From 3c5b46d3ca41ee6df5092845de647e1b32cb6fb6 Mon Sep 17 00:00:00 2001 From: Vishwanath Narasimhan Date: Wed, 1 Aug 2018 16:54:33 -0700 Subject: [PATCH 01/88] Updatng release history --- README.md | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 18e50ebe3..a822f6f97 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,35 @@ -# Docker Monitoring Agent for OMI Server +# AKS Container Health monitoring -### Code of Conduct +## Code of Conduct This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + +## Release History + +### 7/31/2018 - Version microsoft/oms:ciprod07312018 +- Changes for node lost scenario (roll-up pod & container statuses as Unknown) +- Discover unscheduled pods +- KubeNodeInventory - delimit multiple true node conditions for node status +- UTF Encoding support for container logs +- Container environment variable truncated to 200K +- Handle json parsing errors for OMI provider for docker +- Test mode enablement for ACS-engine testing +- Latest OMS agent (1.6.0-163) +- Latest OMI (1.4.2.5) + + +### 6/7/2018 - Version microsoft/oms:ciprod06072018 +- Remove node-0 dependency +- Remove passing WSID & Key as environment variables and pass them as kubernetes secret (for non-AKS; we already pass them as secret for AKS) +- Please note that if you are manually deploying thru yaml you need to - +- Provide workspaceid & key as base64 encoded strings with in double quotes (.yaml has comments to do so as well) +- Provide cluster name twice (for each container – daemonset & replicaset) + +### 5/8/2018 - Version microsoft/oms:ciprod05082018 +- Kubernetes RBAC enablement +- Latest released omsagent (1.6.0-42) +- Bug fix so that we do not collect kube-system namespace container logs when kube api calls fail occasionally (Bug #215107) +- .yaml changes (for RBAC) From d31f5889ec2f9ff6981efc72f2166b0430bffae9 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 1 Aug 2018 16:52:40 -0700 Subject: [PATCH 02/88] fixing the plugin logs for emit stream --- source/code/plugin/in_cadvisor_perf.rb | 4 ++-- source/code/plugin/in_kube_nodes.rb | 7 ++++--- source/code/plugin/in_kube_podinventory.rb | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index 01f2fa9f4..2e28650f6 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -55,10 +55,10 @@ def enumerate() end router.emit_stream(@tag, eventStream) if eventStream - if (ENV['ISTEST'] == true && eventStream.count > 0) + @@istestvar = ENV['ISTEST'] + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) $log.info("in_cadvisor_perf::emit-stream : Success @ #{Time.now.utc.iso8601}") end - rescue => errorStr $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index 473978cbc..6cbad0897 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -99,9 +99,10 @@ def enumerate eventStream.add(emitTime, wrapper) if wrapper end router.emit_stream(@tag, eventStream) if eventStream - if (ENV['ISTEST'] == true && eventStream.count > 0) - $log.info("in_kube_nodeinventory::emit-stream : Success @ #{Time.now.utc.iso8601}") - end + @@istestvar = ENV['ISTEST'] + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) + $log.info("in_kube_nodeinventory::emit-stream : Success @ #{Time.now.utc.iso8601}") + end end rescue => errorStr $log.warn "Failed to retrieve node inventory: #{errorStr}" diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index a96a0b207..656d1aa48 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -190,7 +190,8 @@ def parse_and_emit_records(podInventory, serviceList) end end #podInventory block end router.emit_stream(@tag, eventStream) if eventStream - if (ENV['ISTEST'] == true && eventStream.count > 0) + @@istestvar = ENV['ISTEST'] + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) $log.info("in_kube_podinventory::emit-stream : Success @ #{Time.now.utc.iso8601}") end rescue => errorStr From 11fd5f6d4e3dd0b4fe57c8f4a551d1da4e8fa41f Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 5 Aug 2018 00:37:52 -0700 Subject: [PATCH 03/88] updating log message --- source/code/plugin/in_cadvisor_perf.rb | 2 +- source/code/plugin/in_kube_nodes.rb | 2 +- source/code/plugin/in_kube_podinventory.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index 2e28650f6..5b551f74e 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -57,7 +57,7 @@ def enumerate() router.emit_stream(@tag, eventStream) if eventStream @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) - $log.info("in_cadvisor_perf::emit-stream : Success @ #{Time.now.utc.iso8601}") + $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") end rescue => errorStr $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}" diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index 6cbad0897..edbbdd37f 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -101,7 +101,7 @@ def enumerate router.emit_stream(@tag, eventStream) if eventStream @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) - $log.info("in_kube_nodeinventory::emit-stream : Success @ #{Time.now.utc.iso8601}") + $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") end end rescue => errorStr diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index 656d1aa48..f478705f6 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -192,7 +192,7 @@ def parse_and_emit_records(podInventory, serviceList) router.emit_stream(@tag, eventStream) if eventStream @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) - $log.info("in_kube_podinventory::emit-stream : Success @ #{Time.now.utc.iso8601}") + $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") end rescue => errorStr $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}" From 87a9cf8ddb77f789a805b433ca4ff92556f7d8a0 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 16 Aug 2018 11:58:10 -0700 Subject: [PATCH 04/88] Remove Log Processing from fluentd configuration --- installer/conf/container.conf | 32 -- .../code/plugin/containerlogtailfilereader.rb | 396 ------------------ source/code/plugin/filter_container_log.rb | 42 -- 3 files changed, 470 deletions(-) delete mode 100644 source/code/plugin/containerlogtailfilereader.rb delete mode 100644 source/code/plugin/filter_container_log.rb diff --git a/installer/conf/container.conf b/installer/conf/container.conf index a20fdbe5a..9eaed9b47 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -50,18 +50,6 @@ ] -# Container log -# Example line which matches the format: -# {"log"=>"Test 9th January\n", "stream"=>"stdout", "time"=>"2018-01-09T23:14:39.273429353Z", "ContainerID"=>"ee1ec26aa974af81b21fff24cef8ec78bf7ac1558b5de6f1eb1a5b28ecd6d559", "Image"=>"ubuntu", "Name"=>"determined_wilson", "SourceSystem"=>"Containers"} -# NOTE: The LogEntryTimeStamp is just being appended in the begining of the LogEntry field. This is the actual time the log was generated and the TimeGenerated field in Kusto is different - - type containerlog_sudo_tail - pos_file /var/opt/microsoft/docker-cimprov/state/ContainerLogFile.pos.log - tag oms.container.log - format /\"log\"=>\"(?.*)", \"stream\"=>\"(?.*)", \"time\"=>\"(?.*)", \"ContainerID\"=>\"(?.*)", \"Image\"=>\"(?.*)", \"Name\"=>\"(?.*)", \"SourceSystem\"=>\"(?.*)"}/ - run_interval 60s - - # Container host inventory type omi @@ -95,11 +83,6 @@ type filter_container -# Seperate filter for container log - - type filter_container_log - - type out_oms_api log_level debug @@ -152,21 +135,6 @@ max_retry_wait 9m - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_log*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 15s - max_retry_wait 9m - - type out_oms log_level info diff --git a/source/code/plugin/containerlogtailfilereader.rb b/source/code/plugin/containerlogtailfilereader.rb deleted file mode 100644 index 2d55b1d73..000000000 --- a/source/code/plugin/containerlogtailfilereader.rb +++ /dev/null @@ -1,396 +0,0 @@ - -require 'optparse' -require 'json' -require 'logger' -require_relative 'omslog' -require 'fluent/filter' - -module ContainerLogTailscript - - class ContainerLogNewTail - def initialize(paths) - @paths = paths - @tails = {} - @pos_file = $options[:pos_file] - @read_from_head = $options[:read_from_head] - @pf = nil - @pf_file = nil - - @log = Logger.new(STDERR) - @log.formatter = proc do |severity, time, progname, msg| - "#{severity} #{msg}\n" - end - end - - attr_reader :paths - - def start - start_watchers(@paths) unless @paths.empty? - end - - def shutdown - @pf_file.close if @pf_file - end - - def setup_watcher(path, pe) - tw = TailWatcher.new(path, pe, @read_from_head, @log, &method(:receive_lines)) - tw.on_notify - tw - end - - def start_watchers(paths) - if @pos_file - @pf_file = File.open(@pos_file, File::RDWR|File::CREAT) - @pf_file.sync = true - @pf = PositionFile.parse(@pf_file) - end - paths.each { |path| - pe = nil - if @pf - pe = @pf[path] #pe is FilePositionEntry instance - if pe.read_inode.zero? - begin - pe.update(File::Stat.new(path).ino, 0) - rescue Errno::ENOENT - @log.warn "#{path} not found. Continuing without tailing it." - end - end - end - - @tails[path] = setup_watcher(path, pe) - } - end - - def receive_lines(lines, tail_watcher) - unless lines.empty? - puts lines - end - return true - end - - class TailWatcher - def initialize(path, pe, read_from_head, log, &receive_lines) - @path = path - @pe = pe || MemoryPositionEntry.new - @read_from_head = read_from_head - @log = log - @receive_lines = receive_lines - @rotate_handler = RotateHandler.new(path, log, &method(:on_rotate)) - @io_handler = nil - @containerIDFilePath = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory/" - end - - attr_reader :path - - def wrap_receive_lines(lines) - newLines = [] - containerID = @path.split('/').last.chomp('-json.log') - containerInspectInformation = @containerIDFilePath + containerID - tempContainerInfo = {} - begin - File.open(containerInspectInformation) { |f| tempContainerInfo = JSON.parse(f.readline)} - lines.each { |line| - unless line.empty? - newLine = {} - newLine = JSON.parse(line) - newLine["ContainerID"] = containerID - newLine["Image"] = tempContainerInfo["Image"] - newLine["Name"] = tempContainerInfo["ElementName"] - newLine["SourceSystem"] = "Containers" - newLines.push(newLine) - end - } - rescue Exception => e - #File doesn't exist or error in reading the data - @log.error "Caught exception when opening file -> #{e}" - end - @receive_lines.call(newLines, self) - end - - def on_notify - @rotate_handler.on_notify if @rotate_handler - return unless @io_handler - @io_handler.on_notify - end - - def on_rotate(io) - if io - # first time - stat = io.stat - fsize = stat.size - inode = stat.ino - - last_inode = @pe.read_inode - if @read_from_head - pos = 0 - @pe.update(inode, pos) - elsif inode == last_inode - # rotated file has the same inode number as the pos_file. - # seek to the saved position - pos = @pe.read_pos - elsif last_inode != 0 - # read data from the head of the rotated file. - pos = 0 - @pe.update(inode, pos) - else - # this is the first MemoryPositionEntry for the first time fluentd started. - # seeks to the end of the file to know where to start tailing - pos = fsize - @pe.update(inode, pos) - end - io.seek(pos) - @io_handler = IOHandler.new(io, @pe, @log, &method(:wrap_receive_lines)) - else - @io_handler = NullIOHandler.new - end - end - - class IOHandler - def initialize(io, pe, log, &receive_lines) - @log = log - @io = io - @pe = pe - @log = log - @read_lines_limit = 100 - @receive_lines = receive_lines - @buffer = ''.force_encoding('ASCII-8BIT') - @iobuf = ''.force_encoding('ASCII-8BIT') - @lines = [] - end - - attr_reader :io - - def on_notify - begin - read_more = false - if @lines.empty? - begin - while true - if @buffer.empty? - @io.readpartial(512, @buffer) - else - @buffer << @io.readpartial(512, @iobuf) - end - while line = @buffer.slice!(/.*?\n/m) - @lines << line - end - if @lines.size >= @read_lines_limit - # not to use too much memory in case the file is very large - read_more = true - break - end - end - rescue EOFError - end - end - - unless @lines.empty? - if @receive_lines.call(@lines) - @pe.update_pos(@io.pos - @buffer.bytesize) - @lines.clear - else - read_more = false - end - end - end while read_more - - rescue - @log.error "#{$!.to_s}" - close - end - - def close - @io.close unless @io.closed? - end - end - - class NullIOHandler - def initialize - end - - def io - end - - def on_notify - end - - def close - end - end - - class RotateHandler - def initialize(path, log, &on_rotate) - @path = path - @inode = nil - @fsize = -1 # first - @on_rotate = on_rotate - @log = log - end - - def on_notify - begin - stat = File.stat(@path) #returns a File::Stat object for the file named @path - inode = stat.ino - fsize = stat.size - rescue Errno::ENOENT - # moved or deleted - inode = nil - fsize = 0 - end - - begin - if @inode != inode || fsize < @fsize - # rotated or truncated - begin - io = File.open(@path) - rescue Errno::ENOENT - end - @on_rotate.call(io) - end - @inode = inode - @fsize = fsize - end - - rescue - @log.error "#{$!.to_s}" - end - end - end - - - class PositionFile - UNWATCHED_POSITION = 0xffffffffffffffff - - def initialize(file, map, last_pos) - @file = file - @map = map - @last_pos = last_pos - end - - def [](path) - if m = @map[path] - return m - end - - @file.pos = @last_pos - @file.write path - @file.write "\t" - seek = @file.pos - @file.write "0000000000000000\t0000000000000000\n" - @last_pos = @file.pos - - @map[path] = FilePositionEntry.new(@file, seek) - end - - def self.parse(file) - compact(file) - - map = {} - file.pos = 0 - file.each_line {|line| - m = /^([^\t]+)\t([0-9a-fA-F]+)\t([0-9a-fA-F]+)/.match(line) - next unless m - path = m[1] - seek = file.pos - line.bytesize + path.bytesize + 1 - map[path] = FilePositionEntry.new(file, seek) - } - new(file, map, file.pos) - end - - # Clean up unwatched file entries - def self.compact(file) - file.pos = 0 - existent_entries = file.each_line.map { |line| - m = /^([^\t]+)\t([0-9a-fA-F]+)\t([0-9a-fA-F]+)/.match(line) - next unless m - path = m[1] - pos = m[2].to_i(16) - ino = m[3].to_i(16) - # 32bit inode converted to 64bit at this phase - pos == UNWATCHED_POSITION ? nil : ("%s\t%016x\t%016x\n" % [path, pos, ino]) - }.compact - - file.pos = 0 - file.truncate(0) - file.write(existent_entries.join) - end - end - - # pos inode - # ffffffffffffffff\tffffffffffffffff\n - class FilePositionEntry - POS_SIZE = 16 - INO_OFFSET = 17 - INO_SIZE = 16 - LN_OFFSET = 33 - SIZE = 34 - - def initialize(file, seek) - @file = file - @seek = seek - end - - def update(ino, pos) - @file.pos = @seek - @file.write "%016x\t%016x" % [pos, ino] - end - - def update_pos(pos) - @file.pos = @seek - @file.write "%016x" % pos - end - - def read_inode - @file.pos = @seek + INO_OFFSET - raw = @file.read(INO_SIZE) - raw ? raw.to_i(16) : 0 - end - - def read_pos - @file.pos = @seek - raw = @file.read(POS_SIZE) - raw ? raw.to_i(16) : 0 - end - end - - class MemoryPositionEntry - def initialize - @pos = 0 - @inode = 0 - end - - def update(ino, pos) - @inode = ino - @pos = pos - end - - def update_pos(pos) - @pos = pos - end - - def read_pos - @pos - end - - def read_inode - @inode - end - end - end -end - -if __FILE__ == $0 - $options = {:read_from_head => false} - OptionParser.new do |opts| - opts.on("-p", "--posfile [POSFILE]") do |p| - $options[:pos_file] = p - end - opts.on("-h", "--[no-]readfromhead") do |h| - $options[:read_from_head] = h - end - end.parse! - a = ContainerLogTailscript::ContainerLogNewTail.new(ARGV) - a.start - a.shutdown -end - diff --git a/source/code/plugin/filter_container_log.rb b/source/code/plugin/filter_container_log.rb deleted file mode 100644 index 21e146a35..000000000 --- a/source/code/plugin/filter_container_log.rb +++ /dev/null @@ -1,42 +0,0 @@ -# frozen_string_literal: true - -require 'fluent/filter' - -module Fluent - require 'logger' - class PassThruFilter < Filter - Fluent::Plugin.register_filter('filter_container_log', self) - - def configure(conf) - super - end - - def start - super - @hostname = OMS::Common.get_hostname or "Unknown host" - end - - def shutdown - super - end - - def filter(tag, time, record) - begin - #Try to force utf-8 encoding on the string so that all characters can flow through to - #$log.info "before : #{record['LogEntry']}" - record['LogEntry'].force_encoding('UTF-8') - rescue - $log.error "Failed to convert record['LogEntry'] : '#{record['LogEntry']}' to UTF-8 using force_encoding." - $log.error "Current string encoding for record['LogEntry'] is #{record['LogEntry'].encoding}" - end - - record['Computer'] = @hostname - wrapper = { - "DataType"=>"CONTAINER_LOG_BLOB", - "IPName"=>"Containers", - "DataItems"=>[record.each{|k,v| record[k]=v}] - } - wrapper - end - end -end From 308be41fe87202ee6e289cc9c952a24910eed133 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 16 Aug 2018 12:01:14 -0700 Subject: [PATCH 05/88] Remove plugin references from base_container.data --- installer/datafiles/base_container.data | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index c49a8d1d0..ec0728c01 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -23,14 +23,11 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/filter_docker_log.rb; source/code/plugin/filter_docker_log.rb; 644; root; root /opt/microsoft/omsagent/plugin/filter_container.rb; source/code/plugin/filter_container.rb; 644; root; root -/opt/microsoft/omsagent/plugin/filter_container_log.rb; source/code/plugin/filter_container_log.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_kube_podinventory.rb; source/code/plugin/in_kube_podinventory.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_kube_events.rb; source/code/plugin/in_kube_events.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_kube_logs.rb; source/code/plugin/in_kube_logs.rb; 644; root; root /opt/microsoft/omsagent/plugin/KubernetesApiClient.rb; source/code/plugin/KubernetesApiClient.rb; 644; root; root -/opt/microsoft/omsagent/plugin/in_containerlog_sudo_tail.rb; source/code/plugin/in_containerlog_sudo_tail.rb; 644; root; root -/opt/microsoft/omsagent/plugin/containerlogtailfilereader.rb; source/code/plugin/containerlogtailfilereader.rb; 744; root; root /etc/opt/microsoft/docker-cimprov/container.conf; installer/conf/container.conf; 644; root; root @@ -88,15 +85,6 @@ WriteInstallInfo() { } WriteInstallInfo -#Setup sudo permission for containerlogtailfilereader -if [ -z $(cat /etc/sudoers.d/omsagent | grep /containerlogtailfilereader.rb) ] -then - chmod +w /etc/sudoers.d/omsagent - echo "#run containerlogtailfilereader.rb for docker-provider" >> /etc/sudoers.d/omsagent - echo "omsagent ALL=(ALL) NOPASSWD: /opt/microsoft/omsagent/ruby/bin/ruby /opt/microsoft/omsagent/plugin/containerlogtailfilereader.rb *" >> /etc/sudoers.d/omsagent - chmod 440 /etc/sudoers.d/omsagent -fi - # Get the state file in place with proper permissions touch /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt chmod 644 /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt From bcd1a3ff040eb25218cfffd5028394f7594075c7 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Fri, 14 Sep 2018 10:46:55 -0700 Subject: [PATCH 06/88] Dilipr/fluent bit log processing (#126) * Build out_oms.so and include in docker-cimprov package * Adding fluent-bit-config file to base container * PR Feedback * Adding out_oms.conf to base_container.data * PR Feedback * Making the critical section as small as possible * PR Feedback * Fixing the newline bug for Computer, and changing containerId to Id --- build/Makefile | 829 ++++++++++++------------ installer/conf/out_oms.conf | 6 + installer/conf/td-agent-bit.conf | 35 + installer/datafiles/base_container.data | 7 +- source/code/go/src/plugins/Makefile | 20 + source/code/go/src/plugins/glide.lock | 209 ++++++ source/code/go/src/plugins/glide.yaml | 15 + source/code/go/src/plugins/oms.go | 359 ++++++++++ source/code/go/src/plugins/out_oms.go | 57 ++ source/code/go/src/plugins/utils.go | 67 ++ 10 files changed, 1194 insertions(+), 410 deletions(-) create mode 100644 installer/conf/out_oms.conf create mode 100644 installer/conf/td-agent-bit.conf create mode 100644 source/code/go/src/plugins/Makefile create mode 100644 source/code/go/src/plugins/glide.lock create mode 100644 source/code/go/src/plugins/glide.yaml create mode 100644 source/code/go/src/plugins/oms.go create mode 100644 source/code/go/src/plugins/out_oms.go create mode 100644 source/code/go/src/plugins/utils.go diff --git a/build/Makefile b/build/Makefile index 9586c3b23..b5312cfe3 100644 --- a/build/Makefile +++ b/build/Makefile @@ -1,409 +1,420 @@ -# -*- mode: Makefile; -*- -# Copyright (c) Microsoft Corporation - -BASE_DIR := $(subst /build,,$(PWD)) -OMI_ROOT := $(shell cd ../../omi/Unix; pwd -P) -SCXPAL_DIR := $(shell cd ../../pal; pwd -P) - -PF_POSIX := 1 -include $(SCXPAL_DIR)/build/config.mak -include $(BASE_DIR)/build/config.mak -include $(SCXPAL_DIR)/build/Makefile.pal - -ifndef ENABLE_DEBUG -$(error "ENABLE_DEBUG is not set. Please re-run configure") -endif - -# Include the version file -include ../../docker.version - -ifndef CONTAINER_BUILDVERSION_STATUS -$(error "Is docker.version missing? Please re-run configure") -endif - -SOURCE_DIR := $(BASE_DIR)/source/code -TEST_DIR := $(BASE_DIR)/test/code - -PROVIDER_DIR := $(SOURCE_DIR)/providers -PROVIDER_TEST_DIR := $(TEST_DIR)/providers -PAL_INCLUDE_DIR := $(SCXPAL_DIR)/source/code/include -PAL_TESTUTILS_DIR := $(SCXPAL_DIR)/test/code/testutils - -INTERMEDIATE_DIR := $(BASE_DIR)/intermediate/$(BUILD_CONFIGURATION) -INTERMEDIATE_TESTFILES := $(INTERMEDIATE_DIR)/testfiles -TARGET_DIR := $(BASE_DIR)/target/$(BUILD_CONFIGURATION) -PROVIDER_LIBRARY := $(INTERMEDIATE_DIR)/libcontainer.so - -INSTALLER_TMPDIR := $(INTERMEDIATE_DIR)/installer_tmp - -# Include files - -INCLUDE_DEFINES := $(INTERMEDIATE_DIR)/defines.h - -# Compiler flags - -OMI_INCLUDE_FLAGS := -I$(OMI_ROOT)/output/include -PROVIDER_INCLUDE_FLAGS := -I$(PAL_INCLUDE_DIR) -I$(INTERMEDIATE_DIR) - -PROVIDER_TEST_INCLUDE_FLAGS := -Wmissing-include-dirs -Wno-non-virtual-dtor -I$(SCXPAL_DIR)/source/code/include -I$(INTERMEDIATE_DIR) -I$(SCXPAL_DIR)/test/ext/include -I$(OMI_ROOT)/output/include -I$(OMI_ROOT) -I$(OMI_ROOT)/common -I$(SCXPAL_DIR)/test/code/include $(PROVIDER_INCLUDE_FLAGS) -I$(PROVIDER_DIR) - -ifeq ($(ENABLE_DEBUG),1) -PROV_DEBUG_FLAGS := -g -endif - -COMPILE_FLAGS := $(PROV_DEBUG_FLAGS) -D_REENTRANT -fstack-protector-all -Wall -fno-nonansi-builtins -Woverloaded-virtual -Wformat -Wformat-security -Wcast-align -Wswitch-enum -Wshadow -Wwrite-strings -Wredundant-decls -Wcast-qual -fPIC -PROVIDER_COMPILE_FLAGS := $(COMPILE_FLAGS) - -LINK_LIBRARIES := -Wl,-rpath=/opt/omi/lib -L$(OMI_ROOT)/output/lib -lmicxx -L$(SCXPAL_TARGET_DIR) -lscxcore -lUtil -lscxassertabort -lrt -luuid -PROVIDER_TEST_LINK_LIBRARIES := -lbase -lpal -L$(SCXPAL_TARGET_DIR) -lscxcore $(SCXPAL_DIR)/test/ext/lib/linux/$(ARCH)/cppunit/libcppunit.a -lpthread -lrt -luuid - -SHARED_FLAGS := -shared - -# Support for installbuilder - -STAGING_DIR := $(INTERMEDIATE_DIR)/staging - -ifeq ($(ULINUX),1) - # For consistency, the architecture should be i686 (for x86) and x86_64 (for x64) - DOCKER_ARCH := $(shell echo $(PF_ARCH) | sed -e 's/x86$$/i686/' -e 's/x64$$/x86_64/') - OUTPUT_PACKAGE_PREFIX=docker-cimprov-$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH)-$(CONTAINER_BUILDVERSION_BUILDNR).universal.$(DOCKER_ARCH) -else - PF_DISTRO_LC := $(shell echo $(PF_DISTRO) | tr A-Z a-z) - OUTPUT_PACKAGE_PREFIX=docker-cimprov-$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH)-$(CONTAINER_BUILDVERSION_BUILDNR).$(PF_DISTRO_LC).$(PF_MAJOR).$(PF_ARCH) -endif - -ifeq ("$(wildcard /usr/bin/dpkg-deb)","") - DPKG_LOCATION="--DPKG_LOCATION=$(SCXPAL_DIR)/installer/InstallBuilder/tools/bin/dpkg-deb-$(PF_ARCH)" -else - DPKG_LOCATION= -endif - -# Support for src_to_obj handling - -INCLUDES = $(OMI_INCLUDE_FLAGS) $(PROVIDER_INCLUDE_FLAGS) -CFLAGS = $(COMPILE_FLAGS) -CXXFLAGS = $(COMPILE_FLAGS) - -#-------------------------------------------------------------------------------- -# Build targets - -ifeq ($(ULINUX),1) -all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) KIT_STATUS kit -else -all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) -endif - -clean : - $(RMDIR) $(BASE_DIR)/build/cppunit_result.* $(BASE_DIR)/build/scxtestrunner.log $(BASE_DIR)/installer/intermediate $(BASE_DIR)/intermediate $(BASE_DIR)/target $(PROVIDER_TEST_DIR)/providertestutils.cpp - -find $(BASE_DIR) -name \*~ -exec rm {} \; - -$(RM) $(TEST_DIR)/providers/TestScriptPath.h - -distclean : clean - $(RM) $(BASE_DIR)/build/config.mak - -make -C $(OMI_ROOT) distclean - -make -C $(SCXPAL_DIR)/build distclean - -$(RMDIR) $(OMI_ROOT)/output* - -$(RM) $(SCXPAL_DIR)/build/config.mak - -$(RM) $(SCXPAL_DIR)/build/Makefile.config_cache - -PROVIDER_STATUS: - @echo "========================= Performing Building provider" - -KIT_STATUS: - @echo "========================= Performing Building provider tests" - -#-------------------------------------------------------------------------------- -# OMI build -# -# Build the OMI distribution -# -# Technically, we should go to build OMI all the time. But I'd rather not spend -# the time doing it here EVERY TIME, when we never normally change OMI. This is -# a good tradeoff (build if not built, otherwise assume all is well). -# -# Doing a 'make clean' in OMI directory will force us to rebuild. - -$(OMI_ROOT)/output : $(OMI_ROOT)/output/lib/libmicxx.so - -$(OMI_ROOT)/output/lib/libmicxx.so : - @echo "========================= Performing Building OMI" - make -C $(OMI_ROOT) -ifeq ($(PERFORM_OMI_MAKEINSTALL),1) - make -C $(OMI_ROOT) install -endif - -#-------------------------------------------------------------------------------- -# PAL build -# -# Build the PAL (Platform Abstraction Layer) -# -# Doing a 'make clean' in PAL directory will force us to rebuild. - -$(SCXPAL_INTERMEDIATE_DIR) : - @echo "========================= Performing Building PAL" - make -C $(SCXPAL_DIR)/build - -#================================================================================ -# File depends.h (compiler dependencies) -#================================================================================ - -$(INCLUDE_DEFINES) : $(BASE_DIR)/build/config.mak - -$(MKPATH) $(@D) - @$(ECHO) "Creating $@" - @$(call pf_fwrite,"/*-------------------------------------------------------------------------------", $@) - @$(call pf_fappend," Copyright (C) 2007-2015 Microsoft Corp. ", $@) - @$(call pf_fappend," ", $@) - @$(call pf_fappend,"*/ ", $@) - @$(call pf_fappend,"/** ", $@) - @$(call pf_fappend," \file ", $@) - @$(call pf_fappend," ", $@) - @$(call pf_fappend," \brief Auto generated file containing build definitions ", $@) - @$(call pf_fappend," ", $@) - @$(call pf_fappend," \author Automated Build System ", $@) - @$(call pf_fappend," ", $@) - @$(call pf_fappend," DO NOT EDIT THIS FILE! ", $@) - @$(call pf_fappend," DO NOT CHECK IN THIS FILE! ", $@) - @$(call pf_fappend,"*/ ", $@) - @$(call pf_fappend,"/*----------------------------------------------------------------------------*/", $@) - @$(call pf_fappend,"#ifndef DEFINES_H ", $@) - @$(call pf_fappend,"#define DEFINES_H ", $@) - @$(call pf_fappend," ", $@) -ifneq ($(PF_DISTRO),) - @$(call pf_fappend," ", $@) - @$(call pf_fappend,"#ifndef PF_DISTRO_$(PF_DISTRO) ", $@) - @$(call pf_fappend,"#define PF_DISTRO_$(PF_DISTRO) ", $@) - @$(call pf_fappend,"#endif ", $@) -endif -ifneq ($(PF_MAJOR),) - @$(call pf_fappend," ", $@) - @$(call pf_fappend,"#ifndef PF_MAJOR ", $@) - @$(call pf_fappend,"#define PF_MAJOR $(PF_MAJOR) ", $@) - @$(call pf_fappend,"#endif ", $@) -endif -ifneq ($(PF_MINOR),) - @$(call pf_fappend," ", $@) - @$(call pf_fappend,"#ifndef PF_MINOR ", $@) - @$(call pf_fappend,"#define PF_MINOR $(PF_MINOR) ", $@) - @$(call pf_fappend,"#endif ", $@) -endif -ifneq ($(ARCH),) - @$(call pf_fappend," ", $@) - @$(call pf_fappend,"#ifndef $(ARCH) ", $@) - @$(call pf_fappend,"#define $(ARCH) ", $@) - @$(call pf_fappend,"#endif ", $@) -endif -ifeq ($(BUILD_TYPE),Debug) - @$(call pf_fappend," ", $@) - @$(call pf_fappend,"#ifndef _DEBUG ", $@) - @$(call pf_fappend,"#define _DEBUG ", $@) - @$(call pf_fappend,"#endif ", $@) -else - @$(call pf_fappend," ", $@) - @$(call pf_fappend,"#ifndef NDEBUG ", $@) - @$(call pf_fappend,"#define NDEBUG ", $@) - @$(call pf_fappend,"#endif ", $@) -endif - @$(call pf_fappend," ", $@) - @$(call pf_fappend,"#endif /* DEFINES_H */ ", $@) - @$(call pf_fappend,"/*----------------------------E-N-D---O-F---F-I-L-E---------------------------*/", $@) - -#================================================================================ -# Internal functions -#================================================================================ - -# Convert a list of src files with absolute paths under BASE_DIR to corresponding -# object files under intermediate directory -# src_to_obj(list_of_cppfiles) -src_to_obj = $(patsubst $(BASE_DIR)%, $(INTERMEDIATE_DIR)%, $(patsubst %.c, %.o, $(patsubst %.cpp, %.o, $(1)))) - -# No default rules, please -.SUFFIX: - -# Rule for compiling cpp files in source tree, ouptut in mirrored intermediate dir -$(INTERMEDIATE_DIR)/%.o : $(BASE_DIR)/%.cpp $(INCLUDE_DEFINES) - $(MKPATH) $(@D) - $(CXX) -c $(CXXFLAGS) $(INCLUDES) -I$( $(TEST_DIR)/providers/TestScriptPath.h - -test : TEST_STATUS $(SCXPAL_INTERMEDIATE_DIR) $(INTERMEDIATE_DIR)/testrunner - @echo "========================= Performing container testrun execution" - $(MKPATH) $(INTERMEDIATE_TESTFILES) - $(COPY) $(TEST_DIR)/scripts/createEnv.sh $(TEST_DIR)/scripts/testrun_wrapper $(INTERMEDIATE_TESTFILES) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(OMI_ROOT)/output/lib; cd $(INTERMEDIATE_TESTFILES); ./createEnv.sh - cd $(INTERMEDIATE_TESTFILES); ./testrun_wrapper $(INTERMEDIATE_DIR) - -#-------------------------------------------------------------------------------- -# Build the distribution kit -# -# Build the packages via installbuilder -# -# While the "formal build" only builds ULINUX, we may build something else for DEV purposes. -# Assume we ALWAYS build DPKG, but only build RPM if --enable-ulinux is speified in configure. - -kit : CONTAINERLIB_FILENAME = libcontainer.so -kit : $(OMI_ROOT)/output $(PROVIDER_LIBRARY) - -ifeq ($(ULINUX),1) - - @echo "========================= Performing Building RPM and DPKG packages" - $(MKPATH) $(INSTALLER_TMPDIR) - sudo $(RMDIR) $(STAGING_DIR) - $(MKPATH) $(INTERMEDIATE_DIR) - python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \ - --BASE_DIR=$(BASE_DIR) \ - --TARGET_DIR=$(INTERMEDIATE_DIR) \ - --INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \ - --STAGING_DIR=$(STAGING_DIR) \ - --BUILD_TYPE=$(BUILD_TYPE) \ - --BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \ - --PFARCH=$(PF_ARCH) \ - --PFDISTRO=$(PF_DISTRO) \ - --PFMAJOR=$(PF_MAJOR) \ - --PFMINOR=$(PF_MINOR) \ - --VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \ - --RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \ - --CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \ - --OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \ - --DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \ - base_container.data linux.data linux_rpm.data - - sudo $(RMDIR) $(STAGING_DIR) - $(MKPATH) $(INTERMEDIATE_DIR) - python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \ - --BASE_DIR=$(BASE_DIR) \ - --TARGET_DIR=$(INTERMEDIATE_DIR) \ - --INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \ - --STAGING_DIR=$(STAGING_DIR) \ - --BUILD_TYPE=$(BUILD_TYPE) \ - --BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \ - --PFARCH=$(PF_ARCH) \ - --PFDISTRO=$(PF_DISTRO) \ - --PFMAJOR=$(PF_MAJOR) \ - --PFMINOR=$(PF_MINOR) \ - --VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \ - --RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \ - --CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \ - $(DPKG_LOCATION) \ - --OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \ - --DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \ - base_container.data linux.data linux_dpkg.data - - # Strip the package extension from the package filename - sed -re 's/.rpm$$|.deb$$//' $(INTERMEDIATE_DIR)/package_filename > $(INTERMEDIATE_DIR)/package_file.tmp; mv $(INTERMEDIATE_DIR)/package_file.tmp $(INTERMEDIATE_DIR)/package_filename - - # Build the tar file containing both .rpm and .deb packages - cd $(INTERMEDIATE_DIR); tar cvf $(OUTPUT_PACKAGE_PREFIX).tar $(OUTPUT_PACKAGE_PREFIX).rpm $(OUTPUT_PACKAGE_PREFIX).deb - - ../installer/bundle/create_bundle.sh $(PF)_$(PF_DISTRO) $(INTERMEDIATE_DIR) $(OUTPUT_PACKAGE_PREFIX) - # Copy the shell bundle to the target directory - $(MKPATH) $(TARGET_DIR) - cd $(INTERMEDIATE_DIR); $(COPY) `cat $(INTERMEDIATE_DIR)/package_filename`.sh $(TARGET_DIR) - -else - - @echo "========================= Performing Building RPM and DPKG packages" - sudo $(RMDIR) $(STAGING_DIR) - $(MKPATH) $(INTERMEDIATE_DIR) - python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \ - --BASE_DIR=$(BASE_DIR) \ - --TARGET_DIR=$(INTERMEDIATE_DIR) \ - --INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \ - --STAGING_DIR=$(STAGING_DIR) \ - --BUILD_TYPE=$(BUILD_TYPE) \ - --BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \ - --PFARCH=$(PF_ARCH) \ - --PFDISTRO=$(PF_DISTRO) \ - --PFMAJOR=$(PF_MAJOR) \ - --PFMINOR=$(PF_MINOR) \ - --VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \ - --RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \ - --CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \ - $(DPKG_LOCATION) \ - --OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \ - --DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \ - base_container.data linux.data linux_dpkg.data - -endif +# -*- mode: Makefile; -*- +# Copyright (c) Microsoft Corporation + +BASE_DIR := $(subst /build,,$(PWD)) +OMI_ROOT := $(shell cd ../../omi/Unix; pwd -P) +SCXPAL_DIR := $(shell cd ../../pal; pwd -P) + +PF_POSIX := 1 +include $(SCXPAL_DIR)/build/config.mak +include $(BASE_DIR)/build/config.mak +include $(SCXPAL_DIR)/build/Makefile.pal + +ifndef ENABLE_DEBUG +$(error "ENABLE_DEBUG is not set. Please re-run configure") +endif + +# Include the version file +include ../../docker.version + +ifndef CONTAINER_BUILDVERSION_STATUS +$(error "Is docker.version missing? Please re-run configure") +endif + +SOURCE_DIR := $(BASE_DIR)/source/code +TEST_DIR := $(BASE_DIR)/test/code + +PROVIDER_DIR := $(SOURCE_DIR)/providers +PROVIDER_TEST_DIR := $(TEST_DIR)/providers +PAL_INCLUDE_DIR := $(SCXPAL_DIR)/source/code/include +PAL_TESTUTILS_DIR := $(SCXPAL_DIR)/test/code/testutils + +INTERMEDIATE_DIR := $(BASE_DIR)/intermediate/$(BUILD_CONFIGURATION) +INTERMEDIATE_TESTFILES := $(INTERMEDIATE_DIR)/testfiles +TARGET_DIR := $(BASE_DIR)/target/$(BUILD_CONFIGURATION) +PROVIDER_LIBRARY := $(INTERMEDIATE_DIR)/libcontainer.so + +INSTALLER_TMPDIR := $(INTERMEDIATE_DIR)/installer_tmp + +# GO Source dir for custom fluent bit plugin +GO_SOURCE_DIR := $(SOURCE_DIR)/go/src/plugins + +# Include files + +INCLUDE_DEFINES := $(INTERMEDIATE_DIR)/defines.h + +# Compiler flags + +OMI_INCLUDE_FLAGS := -I$(OMI_ROOT)/output/include +PROVIDER_INCLUDE_FLAGS := -I$(PAL_INCLUDE_DIR) -I$(INTERMEDIATE_DIR) + +PROVIDER_TEST_INCLUDE_FLAGS := -Wmissing-include-dirs -Wno-non-virtual-dtor -I$(SCXPAL_DIR)/source/code/include -I$(INTERMEDIATE_DIR) -I$(SCXPAL_DIR)/test/ext/include -I$(OMI_ROOT)/output/include -I$(OMI_ROOT) -I$(OMI_ROOT)/common -I$(SCXPAL_DIR)/test/code/include $(PROVIDER_INCLUDE_FLAGS) -I$(PROVIDER_DIR) + +ifeq ($(ENABLE_DEBUG),1) +PROV_DEBUG_FLAGS := -g +endif + +COMPILE_FLAGS := $(PROV_DEBUG_FLAGS) -D_REENTRANT -fstack-protector-all -Wall -fno-nonansi-builtins -Woverloaded-virtual -Wformat -Wformat-security -Wcast-align -Wswitch-enum -Wshadow -Wwrite-strings -Wredundant-decls -Wcast-qual -fPIC +PROVIDER_COMPILE_FLAGS := $(COMPILE_FLAGS) + +LINK_LIBRARIES := -Wl,-rpath=/opt/omi/lib -L$(OMI_ROOT)/output/lib -lmicxx -L$(SCXPAL_TARGET_DIR) -lscxcore -lUtil -lscxassertabort -lrt -luuid +PROVIDER_TEST_LINK_LIBRARIES := -lbase -lpal -L$(SCXPAL_TARGET_DIR) -lscxcore $(SCXPAL_DIR)/test/ext/lib/linux/$(ARCH)/cppunit/libcppunit.a -lpthread -lrt -luuid + +SHARED_FLAGS := -shared + +# Support for installbuilder + +STAGING_DIR := $(INTERMEDIATE_DIR)/staging + +ifeq ($(ULINUX),1) + # For consistency, the architecture should be i686 (for x86) and x86_64 (for x64) + DOCKER_ARCH := $(shell echo $(PF_ARCH) | sed -e 's/x86$$/i686/' -e 's/x64$$/x86_64/') + OUTPUT_PACKAGE_PREFIX=docker-cimprov-$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH)-$(CONTAINER_BUILDVERSION_BUILDNR).universal.$(DOCKER_ARCH) +else + PF_DISTRO_LC := $(shell echo $(PF_DISTRO) | tr A-Z a-z) + OUTPUT_PACKAGE_PREFIX=docker-cimprov-$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH)-$(CONTAINER_BUILDVERSION_BUILDNR).$(PF_DISTRO_LC).$(PF_MAJOR).$(PF_ARCH) +endif + +ifeq ("$(wildcard /usr/bin/dpkg-deb)","") + DPKG_LOCATION="--DPKG_LOCATION=$(SCXPAL_DIR)/installer/InstallBuilder/tools/bin/dpkg-deb-$(PF_ARCH)" +else + DPKG_LOCATION= +endif + +# Support for src_to_obj handling + +INCLUDES = $(OMI_INCLUDE_FLAGS) $(PROVIDER_INCLUDE_FLAGS) +CFLAGS = $(COMPILE_FLAGS) +CXXFLAGS = $(COMPILE_FLAGS) + +#-------------------------------------------------------------------------------- +# Build targets + +ifeq ($(ULINUX),1) +all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) KIT_STATUS kit fluentbitplugin +else +all : $(OMI_ROOT)/output $(SCXPAL_INTERMEDIATE_DIR) PROVIDER_STATUS $(PROVIDER_LIBRARY) fluentbitplugin +endif + +clean : + $(RMDIR) $(BASE_DIR)/build/cppunit_result.* $(BASE_DIR)/build/scxtestrunner.log $(BASE_DIR)/installer/intermediate $(BASE_DIR)/intermediate $(BASE_DIR)/target $(PROVIDER_TEST_DIR)/providertestutils.cpp + -find $(BASE_DIR) -name \*~ -exec rm {} \; + -$(RM) $(TEST_DIR)/providers/TestScriptPath.h + +distclean : clean + $(RM) $(BASE_DIR)/build/config.mak + -make -C $(OMI_ROOT) distclean + -make -C $(SCXPAL_DIR)/build distclean + -$(RMDIR) $(OMI_ROOT)/output* + -$(RM) $(SCXPAL_DIR)/build/config.mak + -$(RM) $(SCXPAL_DIR)/build/Makefile.config_cache + +PROVIDER_STATUS: + @echo "========================= Performing Building provider" + +KIT_STATUS: + @echo "========================= Performing Building provider tests" + +#-------------------------------------------------------------------------------- +# OMI build +# +# Build the OMI distribution +# +# Technically, we should go to build OMI all the time. But I'd rather not spend +# the time doing it here EVERY TIME, when we never normally change OMI. This is +# a good tradeoff (build if not built, otherwise assume all is well). +# +# Doing a 'make clean' in OMI directory will force us to rebuild. + +$(OMI_ROOT)/output : $(OMI_ROOT)/output/lib/libmicxx.so + +$(OMI_ROOT)/output/lib/libmicxx.so : + @echo "========================= Performing Building OMI" + make -C $(OMI_ROOT) +ifeq ($(PERFORM_OMI_MAKEINSTALL),1) + make -C $(OMI_ROOT) install +endif + +#--------------------------------------------------------------------------------- +# fluentbit go plugin build. This is required to send container logs to ODS endpoint +# +fluentbitplugin : + @echo "========================= Building fluentbit out_oms go plugin for logs" + make -C $(GO_SOURCE_DIR) fbplugin + $(COPY) $(GO_SOURCE_DIR)/out_oms.so $(INTERMEDIATE_DIR) + +#-------------------------------------------------------------------------------- +# PAL build +# +# Build the PAL (Platform Abstraction Layer) +# +# Doing a 'make clean' in PAL directory will force us to rebuild. + +$(SCXPAL_INTERMEDIATE_DIR) : + @echo "========================= Performing Building PAL" + make -C $(SCXPAL_DIR)/build + +#================================================================================ +# File depends.h (compiler dependencies) +#================================================================================ + +$(INCLUDE_DEFINES) : $(BASE_DIR)/build/config.mak + -$(MKPATH) $(@D) + @$(ECHO) "Creating $@" + @$(call pf_fwrite,"/*-------------------------------------------------------------------------------", $@) + @$(call pf_fappend," Copyright (C) 2007-2015 Microsoft Corp. ", $@) + @$(call pf_fappend," ", $@) + @$(call pf_fappend,"*/ ", $@) + @$(call pf_fappend,"/** ", $@) + @$(call pf_fappend," \file ", $@) + @$(call pf_fappend," ", $@) + @$(call pf_fappend," \brief Auto generated file containing build definitions ", $@) + @$(call pf_fappend," ", $@) + @$(call pf_fappend," \author Automated Build System ", $@) + @$(call pf_fappend," ", $@) + @$(call pf_fappend," DO NOT EDIT THIS FILE! ", $@) + @$(call pf_fappend," DO NOT CHECK IN THIS FILE! ", $@) + @$(call pf_fappend,"*/ ", $@) + @$(call pf_fappend,"/*----------------------------------------------------------------------------*/", $@) + @$(call pf_fappend,"#ifndef DEFINES_H ", $@) + @$(call pf_fappend,"#define DEFINES_H ", $@) + @$(call pf_fappend," ", $@) +ifneq ($(PF_DISTRO),) + @$(call pf_fappend," ", $@) + @$(call pf_fappend,"#ifndef PF_DISTRO_$(PF_DISTRO) ", $@) + @$(call pf_fappend,"#define PF_DISTRO_$(PF_DISTRO) ", $@) + @$(call pf_fappend,"#endif ", $@) +endif +ifneq ($(PF_MAJOR),) + @$(call pf_fappend," ", $@) + @$(call pf_fappend,"#ifndef PF_MAJOR ", $@) + @$(call pf_fappend,"#define PF_MAJOR $(PF_MAJOR) ", $@) + @$(call pf_fappend,"#endif ", $@) +endif +ifneq ($(PF_MINOR),) + @$(call pf_fappend," ", $@) + @$(call pf_fappend,"#ifndef PF_MINOR ", $@) + @$(call pf_fappend,"#define PF_MINOR $(PF_MINOR) ", $@) + @$(call pf_fappend,"#endif ", $@) +endif +ifneq ($(ARCH),) + @$(call pf_fappend," ", $@) + @$(call pf_fappend,"#ifndef $(ARCH) ", $@) + @$(call pf_fappend,"#define $(ARCH) ", $@) + @$(call pf_fappend,"#endif ", $@) +endif +ifeq ($(BUILD_TYPE),Debug) + @$(call pf_fappend," ", $@) + @$(call pf_fappend,"#ifndef _DEBUG ", $@) + @$(call pf_fappend,"#define _DEBUG ", $@) + @$(call pf_fappend,"#endif ", $@) +else + @$(call pf_fappend," ", $@) + @$(call pf_fappend,"#ifndef NDEBUG ", $@) + @$(call pf_fappend,"#define NDEBUG ", $@) + @$(call pf_fappend,"#endif ", $@) +endif + @$(call pf_fappend," ", $@) + @$(call pf_fappend,"#endif /* DEFINES_H */ ", $@) + @$(call pf_fappend,"/*----------------------------E-N-D---O-F---F-I-L-E---------------------------*/", $@) + +#================================================================================ +# Internal functions +#================================================================================ + +# Convert a list of src files with absolute paths under BASE_DIR to corresponding +# object files under intermediate directory +# src_to_obj(list_of_cppfiles) +src_to_obj = $(patsubst $(BASE_DIR)%, $(INTERMEDIATE_DIR)%, $(patsubst %.c, %.o, $(patsubst %.cpp, %.o, $(1)))) + +# No default rules, please +.SUFFIX: + +# Rule for compiling cpp files in source tree, ouptut in mirrored intermediate dir +$(INTERMEDIATE_DIR)/%.o : $(BASE_DIR)/%.cpp $(INCLUDE_DEFINES) + $(MKPATH) $(@D) + $(CXX) -c $(CXXFLAGS) $(INCLUDES) -I$( $(TEST_DIR)/providers/TestScriptPath.h + +test : TEST_STATUS $(SCXPAL_INTERMEDIATE_DIR) $(INTERMEDIATE_DIR)/testrunner + @echo "========================= Performing container testrun execution" + $(MKPATH) $(INTERMEDIATE_TESTFILES) + $(COPY) $(TEST_DIR)/scripts/createEnv.sh $(TEST_DIR)/scripts/testrun_wrapper $(INTERMEDIATE_TESTFILES) + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(OMI_ROOT)/output/lib; cd $(INTERMEDIATE_TESTFILES); ./createEnv.sh + cd $(INTERMEDIATE_TESTFILES); ./testrun_wrapper $(INTERMEDIATE_DIR) + +#-------------------------------------------------------------------------------- +# Build the distribution kit +# +# Build the packages via installbuilder +# +# While the "formal build" only builds ULINUX, we may build something else for DEV purposes. +# Assume we ALWAYS build DPKG, but only build RPM if --enable-ulinux is speified in configure. + +kit : CONTAINERLIB_FILENAME = libcontainer.so +kit : $(OMI_ROOT)/output $(PROVIDER_LIBRARY) fluentbitplugin + +ifeq ($(ULINUX),1) + + @echo "========================= Performing Building RPM and DPKG packages" + $(MKPATH) $(INSTALLER_TMPDIR) + sudo $(RMDIR) $(STAGING_DIR) + $(MKPATH) $(INTERMEDIATE_DIR) + python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \ + --BASE_DIR=$(BASE_DIR) \ + --TARGET_DIR=$(INTERMEDIATE_DIR) \ + --INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \ + --STAGING_DIR=$(STAGING_DIR) \ + --BUILD_TYPE=$(BUILD_TYPE) \ + --BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \ + --PFARCH=$(PF_ARCH) \ + --PFDISTRO=$(PF_DISTRO) \ + --PFMAJOR=$(PF_MAJOR) \ + --PFMINOR=$(PF_MINOR) \ + --VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \ + --RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \ + --CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \ + --OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \ + --DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \ + base_container.data linux.data linux_rpm.data + + sudo $(RMDIR) $(STAGING_DIR) + $(MKPATH) $(INTERMEDIATE_DIR) + python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \ + --BASE_DIR=$(BASE_DIR) \ + --TARGET_DIR=$(INTERMEDIATE_DIR) \ + --INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \ + --STAGING_DIR=$(STAGING_DIR) \ + --BUILD_TYPE=$(BUILD_TYPE) \ + --BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \ + --PFARCH=$(PF_ARCH) \ + --PFDISTRO=$(PF_DISTRO) \ + --PFMAJOR=$(PF_MAJOR) \ + --PFMINOR=$(PF_MINOR) \ + --VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \ + --RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \ + --CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \ + $(DPKG_LOCATION) \ + --OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \ + --DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \ + base_container.data linux.data linux_dpkg.data + + # Strip the package extension from the package filename + sed -re 's/.rpm$$|.deb$$//' $(INTERMEDIATE_DIR)/package_filename > $(INTERMEDIATE_DIR)/package_file.tmp; mv $(INTERMEDIATE_DIR)/package_file.tmp $(INTERMEDIATE_DIR)/package_filename + + # Build the tar file containing both .rpm and .deb packages + cd $(INTERMEDIATE_DIR); tar cvf $(OUTPUT_PACKAGE_PREFIX).tar $(OUTPUT_PACKAGE_PREFIX).rpm $(OUTPUT_PACKAGE_PREFIX).deb + + ../installer/bundle/create_bundle.sh $(PF)_$(PF_DISTRO) $(INTERMEDIATE_DIR) $(OUTPUT_PACKAGE_PREFIX) + # Copy the shell bundle to the target directory + $(MKPATH) $(TARGET_DIR) + cd $(INTERMEDIATE_DIR); $(COPY) `cat $(INTERMEDIATE_DIR)/package_filename`.sh $(TARGET_DIR) + +else + + @echo "========================= Performing Building RPM and DPKG packages" + sudo $(RMDIR) $(STAGING_DIR) + $(MKPATH) $(INTERMEDIATE_DIR) + python $(SCXPAL_DIR)/installer/InstallBuilder/installbuilder.py \ + --BASE_DIR=$(BASE_DIR) \ + --TARGET_DIR=$(INTERMEDIATE_DIR) \ + --INTERMEDIATE_DIR=$(INSTALLER_TMPDIR) \ + --STAGING_DIR=$(STAGING_DIR) \ + --BUILD_TYPE=$(BUILD_TYPE) \ + --BUILD_CONFIGURATION=$(BUILD_CONFIGURATION) \ + --PFARCH=$(PF_ARCH) \ + --PFDISTRO=$(PF_DISTRO) \ + --PFMAJOR=$(PF_MAJOR) \ + --PFMINOR=$(PF_MINOR) \ + --VERSION=$(CONTAINER_BUILDVERSION_MAJOR).$(CONTAINER_BUILDVERSION_MINOR).$(CONTAINER_BUILDVERSION_PATCH) \ + --RELEASE=$(CONTAINER_BUILDVERSION_BUILDNR) \ + --CONTAINER_BUILD_LIBRARY=$(CONTAINERLIB_FILENAME) \ + $(DPKG_LOCATION) \ + --OUTPUTFILE=$(OUTPUT_PACKAGE_PREFIX) \ + --DATAFILE_PATH=$(BASE_DIR)/installer/datafiles \ + base_container.data linux.data linux_dpkg.data + +endif diff --git a/installer/conf/out_oms.conf b/installer/conf/out_oms.conf new file mode 100644 index 000000000..d4b797757 --- /dev/null +++ b/installer/conf/out_oms.conf @@ -0,0 +1,6 @@ +omsadmin_conf_path=/etc/opt/microsoft/omsagent/conf/omsadmin.conf +cert_file_path=/etc/opt/microsoft/omsagent/certs/oms.crt +key_file_path=/etc/opt/microsoft/omsagent/certs/oms.key +container_host_file_path=/var/opt/microsoft/docker-cimprov/state/containerhostname +container_inventory_refresh_interval=60 +kube_system_containers_refresh_interval=300 diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf new file mode 100644 index 000000000..cf490c077 --- /dev/null +++ b/installer/conf/td-agent-bit.conf @@ -0,0 +1,35 @@ +[SERVICE] + Flush 5 + Log_Level info + Parsers_File /etc/td-agent-bit/parsers.conf + Log_File /var/log/fluent-bit.log + +[INPUT] + Name tail + Tag oms.container.log.* + Path /var/log/containers/*.log + DB /var/log/fblogs.db + Parser docker + Mem_Buf_Limit 30m + Path_Key filepath + +[FILTER] + Name record_modifier + Match oms.container.log.* + Whitelist_key log + Whitelist_key stream + Whitelist_key time + Whitelist_key filepath + +[FILTER] + Name modify + Match oms.container.log.* + Rename log LogEntry + Rename stream LogEntrySource + Rename time LogEntryTimeStamp + Rename filepath Filepath + Add_if_not_present SourceSystem Containers + +[OUTPUT] + Name oms + Match oms.container.log.* \ No newline at end of file diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index ec0728c01..85a128b2a 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -37,7 +37,9 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/in_kube_services.rb; source/code/plugin/in_kube_services.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_kube_nodes.rb; source/code/plugin/in_kube_nodes.rb; 644; root; root - +/opt/td-agent-bit/bin/out_oms.so; intermediate/${{BUILD_CONFIGURATION}}/out_oms.so; 755; root; root +/etc/opt/microsoft/docker-cimprov/td-agent-bit.conf; installer/conf/td-agent-bit.conf; 644; root; root +/etc/opt/microsoft/docker-cimprov/out_oms.conf; installer/conf/out_oms.conf; 644; root; root %Links /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root @@ -76,6 +78,9 @@ MAINTAINER: 'Microsoft Corporation' /var/opt/microsoft/docker-cimprov/state/ImageInventory; 755; root; root /var/opt/microsoft/docker-cimprov/log; 755; root; root +/opt/td-agent-bit; 755; root; root;sysdir +/opt/td-agent-bit/bin; 755; root; root;sysdir + %Dependencies %Postinstall_10 diff --git a/source/code/go/src/plugins/Makefile b/source/code/go/src/plugins/Makefile new file mode 100644 index 000000000..dfdc65d81 --- /dev/null +++ b/source/code/go/src/plugins/Makefile @@ -0,0 +1,20 @@ +GITVERSION := 0.1 +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Linux) + BUILDDATE := $(shell date --rfc-3339=seconds) +endif +ifeq ($(UNAME_S),Darwin) + BUILDDATE := $(shell gdate --rfc-3339=seconds) +endif + +fbplugin: + go build -ldflags "-X 'main.revision=$(GITVERSION)' -X 'main.builddate=$(BUILDDATE)'" -buildmode=c-shared -o out_oms.so . + +test: + go test -cover -race -coverprofile=coverage.txt -covermode=atomic + +glide: + glide install + +clean: + rm -rf *.so *.h *~ diff --git a/source/code/go/src/plugins/glide.lock b/source/code/go/src/plugins/glide.lock new file mode 100644 index 000000000..79745820b --- /dev/null +++ b/source/code/go/src/plugins/glide.lock @@ -0,0 +1,209 @@ +hash: a4b073d827b5cbb4a772dada9ff3bcf55c55afc3cda83ddec1e6edcdca8e219a +updated: 2018-09-06T04:07:01.808678175Z +imports: +- name: github.com/fluent/fluent-bit-go + version: c4a158a6e3a793166c6ecfa2d5c80d71eada8959 + subpackages: + - output +- name: github.com/ghodss/yaml + version: 73d445a93680fa1a78ae23a5839bad48f32ba1ee +- name: github.com/gogo/protobuf + version: c0656edd0d9eab7c66d1eb0c568f9039345796f7 + subpackages: + - proto + - sortkeys +- name: github.com/golang/glog + version: 44145f04b68cf362d9c4df2182967c2275eaefed +- name: github.com/golang/protobuf + version: b4deda0973fb4c70b50d226b1af49f3da59f5265 + subpackages: + - proto + - ptypes + - ptypes/any + - ptypes/duration + - ptypes/timestamp +- name: github.com/google/btree + version: 7d79101e329e5a3adf994758c578dab82b90c017 +- name: github.com/google/gofuzz + version: 44d81051d367757e1c7c6a5a86423ece9afcf63c +- name: github.com/googleapis/gnostic + version: 0c5108395e2debce0d731cf0287ddf7242066aba + subpackages: + - OpenAPIv2 + - compiler + - extensions +- name: github.com/gregjones/httpcache + version: 787624de3eb7bd915c329cba748687a3b22666a6 + subpackages: + - diskcache +- name: github.com/json-iterator/go + version: f2b4162afba35581b6d4a50d3b8f34e33c144682 +- name: github.com/mitchellh/mapstructure + version: fa473d140ef3c6adf42d6b391fe76707f1f243c8 +- name: github.com/modern-go/concurrent + version: bacd9c7ef1dd9b15be4a9909b8ac7a4e313eec94 +- name: github.com/modern-go/reflect2 + version: 05fbef0ca5da472bbf96c9322b84a53edc03c9fd +- name: github.com/peterbourgon/diskv + version: 5f041e8faa004a95c88a202771f4cc3e991971e6 +- name: github.com/ugorji/go + version: 00b869d2f4a5e27445c2d916fa106fc72c106d4c + subpackages: + - codec +- name: golang.org/x/crypto + version: 49796115aa4b964c318aad4f3084fdb41e9aa067 + subpackages: + - ssh/terminal +- name: golang.org/x/net + version: 1c05540f6879653db88113bc4a2b70aec4bd491f + subpackages: + - context + - html + - html/atom + - http2 + - http2/hpack + - idna + - lex/httplex + - websocket +- name: golang.org/x/sys + version: 95c6576299259db960f6c5b9b69ea52422860fce + subpackages: + - unix + - windows +- name: golang.org/x/text + version: b19bf474d317b857955b12035d2c5acb57ce8b01 + subpackages: + - secure/bidirule + - transform + - unicode/bidi + - unicode/norm +- name: golang.org/x/time + version: f51c12702a4d776e4c1fa9b0fabab841babae631 + subpackages: + - rate +- name: gopkg.in/inf.v0 + version: 3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4 +- name: gopkg.in/yaml.v2 + version: 670d4cfef0544295bc27a114dbac37980d83185a +- name: k8s.io/api + version: 072894a440bdee3a891dea811fe42902311cd2a3 + subpackages: + - admissionregistration/v1alpha1 + - admissionregistration/v1beta1 + - apps/v1 + - apps/v1beta1 + - apps/v1beta2 + - authentication/v1 + - authentication/v1beta1 + - authorization/v1 + - authorization/v1beta1 + - autoscaling/v1 + - autoscaling/v2beta1 + - batch/v1 + - batch/v1beta1 + - batch/v2alpha1 + - certificates/v1beta1 + - core/v1 + - events/v1beta1 + - extensions/v1beta1 + - imagepolicy/v1alpha1 + - networking/v1 + - policy/v1beta1 + - rbac/v1 + - rbac/v1alpha1 + - rbac/v1beta1 + - scheduling/v1alpha1 + - scheduling/v1beta1 + - settings/v1alpha1 + - storage/v1 + - storage/v1alpha1 + - storage/v1beta1 +- name: k8s.io/apimachinery + version: 103fd098999dc9c0c88536f5c9ad2e5da39373ae + subpackages: + - pkg/api/errors + - pkg/api/meta + - pkg/api/resource + - pkg/apis/meta/v1 + - pkg/apis/meta/v1/unstructured + - pkg/apis/meta/v1beta1 + - pkg/conversion + - pkg/conversion/queryparams + - pkg/fields + - pkg/labels + - pkg/runtime + - pkg/runtime/schema + - pkg/runtime/serializer + - pkg/runtime/serializer/json + - pkg/runtime/serializer/protobuf + - pkg/runtime/serializer/recognizer + - pkg/runtime/serializer/streaming + - pkg/runtime/serializer/versioning + - pkg/selection + - pkg/types + - pkg/util/clock + - pkg/util/errors + - pkg/util/framer + - pkg/util/intstr + - pkg/util/json + - pkg/util/net + - pkg/util/runtime + - pkg/util/sets + - pkg/util/validation + - pkg/util/validation/field + - pkg/util/wait + - pkg/util/yaml + - pkg/version + - pkg/watch + - third_party/forked/golang/reflect +- name: k8s.io/client-go + version: 7d04d0e2a0a1a4d4a1cd6baa432a2301492e4e65 + subpackages: + - discovery + - kubernetes + - kubernetes/scheme + - kubernetes/typed/admissionregistration/v1alpha1 + - kubernetes/typed/admissionregistration/v1beta1 + - kubernetes/typed/apps/v1 + - kubernetes/typed/apps/v1beta1 + - kubernetes/typed/apps/v1beta2 + - kubernetes/typed/authentication/v1 + - kubernetes/typed/authentication/v1beta1 + - kubernetes/typed/authorization/v1 + - kubernetes/typed/authorization/v1beta1 + - kubernetes/typed/autoscaling/v1 + - kubernetes/typed/autoscaling/v2beta1 + - kubernetes/typed/batch/v1 + - kubernetes/typed/batch/v1beta1 + - kubernetes/typed/batch/v2alpha1 + - kubernetes/typed/certificates/v1beta1 + - kubernetes/typed/core/v1 + - kubernetes/typed/events/v1beta1 + - kubernetes/typed/extensions/v1beta1 + - kubernetes/typed/networking/v1 + - kubernetes/typed/policy/v1beta1 + - kubernetes/typed/rbac/v1 + - kubernetes/typed/rbac/v1alpha1 + - kubernetes/typed/rbac/v1beta1 + - kubernetes/typed/scheduling/v1alpha1 + - kubernetes/typed/scheduling/v1beta1 + - kubernetes/typed/settings/v1alpha1 + - kubernetes/typed/storage/v1 + - kubernetes/typed/storage/v1alpha1 + - kubernetes/typed/storage/v1beta1 + - pkg/apis/clientauthentication + - pkg/apis/clientauthentication/v1alpha1 + - pkg/apis/clientauthentication/v1beta1 + - pkg/version + - plugin/pkg/client/auth/exec + - rest + - rest/watch + - tools/clientcmd/api + - tools/metrics + - tools/reference + - transport + - util/cert + - util/connrotation + - util/flowcontrol + - util/integer +testImports: [] diff --git a/source/code/go/src/plugins/glide.yaml b/source/code/go/src/plugins/glide.yaml new file mode 100644 index 000000000..b986ece21 --- /dev/null +++ b/source/code/go/src/plugins/glide.yaml @@ -0,0 +1,15 @@ +package: plugins +import: +- package: github.com/fluent/fluent-bit-go + subpackages: + - output +- package: github.com/mitchellh/mapstructure + version: ^1.0.0 +- package: k8s.io/apimachinery + subpackages: + - pkg/apis/meta/v1 +- package: k8s.io/client-go + version: ^8.0.0 + subpackages: + - kubernetes + - rest diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go new file mode 100644 index 000000000..49472c74b --- /dev/null +++ b/source/code/go/src/plugins/oms.go @@ -0,0 +1,359 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "log" + "net/http" + "os" + "strconv" + "strings" + "sync" + "time" + + "github.com/fluent/fluent-bit-go/output" + "github.com/mitchellh/mapstructure" + lumberjack "gopkg.in/natefinch/lumberjack.v2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +// DataType for Container Log +const DataType = "CONTAINER_LOG_BLOB" + +// IPName for Container Log +const IPName = "Containers" +const containerInventoryPath = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory" +const defaultContainerInventoryRefreshInterval = 60 +const defaultKubeSystemContainersRefreshInterval = 300 + +var ( + // PluginConfiguration the plugins configuration + PluginConfiguration map[string]string + // HTTPClient for making POST requests to OMSEndpoint + HTTPClient http.Client + // OMSEndpoint ingestion endpoint + OMSEndpoint string + // Computer (Hostname) when ingesting into ContainerLog table + Computer string +) + +var ( + // ImageIDMap caches the container id to image mapping + ImageIDMap map[string]string + // NameIDMap caches the container it to Name mapping + NameIDMap map[string]string + // IgnoreIDSet set of container Ids of kube-system pods + IgnoreIDSet map[string]bool + + // DataUpdateMutex read and write mutex access to the container id set + DataUpdateMutex = &sync.Mutex{} +) + +var ( + // FLBLogger stream + FLBLogger = createLogger() + + // Log wrapper function + Log = FLBLogger.Printf +) + +// ContainerInventory represents the container info +type ContainerInventory struct { + ElementName string `json:"ElementName"` + CreatedTime string `json:"CreatedTime"` + State string `json:"State"` + ExitCode int `json:"ExitCode"` + StartedTime string `json:"StartedTime"` + FinishedTime string `json:"FinishedTime"` + ImageID string `json:"ImageId"` + Image string `json:"Image"` + Repository string `json:"Repository"` + ImageTag string `json:"ImageTag"` + ComposeGroup string `json:"ComposeGroup"` + ContainerHostname string `json:"ContainerHostname"` + Computer string `json:"Computer"` + Command string `json:"Command"` + EnvironmentVar string `json:"EnvironmentVar"` + Ports string `json:"Ports"` + Links string `json:"Links"` +} + +// DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin +type DataItem struct { + LogEntry string `json:"LogEntry"` + LogEntrySource string `json:"LogEntrySource"` + LogEntryTimeStamp string `json:"LogEntryTimeStamp"` + ID string `json:"Id"` + Image string `json:"Image"` + Name string `json:"Name"` + SourceSystem string `json:"SourceSystem"` + Computer string `json:"Computer"` + Filepath string `json:"Filepath"` +} + +// ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point +type ContainerLogBlob struct { + DataType string `json:"DataType"` + IPName string `json:"IPName"` + DataItems []DataItem `json:"DataItems"` +} + +func populateMaps() { + + Log("Updating ImageIDMap and NameIDMap") + + _imageIDMap := make(map[string]string) + _nameIDMap := make(map[string]string) + files, err := ioutil.ReadDir(containerInventoryPath) + + if err != nil { + Log("error when reading container inventory %s\n", err.Error()) + } + + for _, file := range files { + fullPath := fmt.Sprintf("%s/%s", containerInventoryPath, file.Name()) + fileContent, err := ioutil.ReadFile(fullPath) + if err != nil { + Log("Error reading file content %s", fullPath) + Log(err.Error()) + } + var containerInventory ContainerInventory + unmarshallErr := json.Unmarshal(fileContent, &containerInventory) + + if unmarshallErr != nil { + Log("Unmarshall error when reading file %s %s \n", fullPath, unmarshallErr.Error()) + } + + _imageIDMap[file.Name()] = containerInventory.Image + _nameIDMap[file.Name()] = containerInventory.ElementName + } + Log("Locking to update image and name maps") + DataUpdateMutex.Lock() + ImageIDMap = _imageIDMap + NameIDMap = _nameIDMap + DataUpdateMutex.Unlock() + Log("Unlocking after updating image and name maps") +} + +func createLogger() *log.Logger { + + var logfile *os.File + path := "/var/opt/microsoft/docker-cimprov/log/fluent-bit-out-oms-runtime.log" + if _, err := os.Stat(path); err == nil { + fmt.Printf("File Exists. Opening file in append mode...\n") + logfile, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0600) + if err != nil { + fmt.Printf(err.Error()) + } + } + + if _, err := os.Stat(path); os.IsNotExist(err) { + fmt.Printf("File Doesnt Exist. Creating file...\n") + logfile, err = os.Create(path) + if err != nil { + fmt.Printf(err.Error()) + } + } + + logger := log.New(logfile, "", 0) + + logger.SetOutput(&lumberjack.Logger{ + Filename: path, + MaxSize: 10, //megabytes + MaxBackups: 3, + MaxAge: 28, //days + Compress: true, // false by default + }) + + logger.SetFlags(log.Ltime | log.Lshortfile | log.LstdFlags) + return logger +} + +func updateContainersData() { + + containerInventoryRefreshInterval, err := strconv.Atoi(PluginConfiguration["container_inventory_refresh_interval"]) + if err != nil { + Log("Error Reading Container Inventory Refresh Interval %s", err.Error()) + containerInventoryRefreshInterval = defaultContainerInventoryRefreshInterval + } + Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval) + go initMaps(containerInventoryRefreshInterval) + + kubeSystemContainersRefreshInterval, err := strconv.Atoi(PluginConfiguration["kube_system_containers_refresh_interval"]) + if err != nil { + Log("Error Reading Kube System Container Ids Refresh Interval %s", err.Error()) + kubeSystemContainersRefreshInterval = defaultKubeSystemContainersRefreshInterval + } + Log("kubeSystemContainersRefreshInterval = %d \n", kubeSystemContainersRefreshInterval) + + go updateIgnoreContainerIds(kubeSystemContainersRefreshInterval) +} + +func initMaps(refreshInterval int) { + ImageIDMap = make(map[string]string) + NameIDMap = make(map[string]string) + + populateMaps() + + for range time.Tick(time.Second * time.Duration(refreshInterval)) { + populateMaps() + } +} + +func updateIgnoreContainerIds(refreshInterval int) { + IgnoreIDSet = make(map[string]bool) + + updateKubeSystemContainerIDs() + + for range time.Tick(time.Second * time.Duration(refreshInterval)) { + updateKubeSystemContainerIDs() + } +} + +func updateKubeSystemContainerIDs() { + + if strings.Compare(os.Getenv("DISABLE_KUBE_SYSTEM_LOG_COLLECTION"), "true") != 0 { + Log("Kube System Log Collection is ENABLED.") + return + } + + Log("Kube System Log Collection is DISABLED. Collecting containerIds to drop their records") + config, err := rest.InClusterConfig() + if err != nil { + Log("Error getting config %s\n", err.Error()) + } + + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + Log("Error getting clientset %s", err.Error()) + } + + pods, err := clientset.CoreV1().Pods("kube-system").List(metav1.ListOptions{}) + if err != nil { + Log("Error getting pods %s\n", err.Error()) + } + + _ignoreIDSet := make(map[string]bool) + for _, pod := range pods.Items { + for _, status := range pod.Status.ContainerStatuses { + lastSlashIndex := strings.LastIndex(status.ContainerID, "/") + _ignoreIDSet[status.ContainerID[lastSlashIndex+1:len(status.ContainerID)]] = true + } + } + + Log("Locking to update kube-system container IDs") + DataUpdateMutex.Lock() + IgnoreIDSet = _ignoreIDSet + DataUpdateMutex.Unlock() + Log("Unlocking after updating kube-system container IDs") +} + +// PostDataHelper sends data to the OMS endpoint +func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { + + start := time.Now() + var dataItems []DataItem + DataUpdateMutex.Lock() + + for _, record := range tailPluginRecords { + + containerID := getContainerIDFromFilePath(toString(record["Filepath"])) + + if containsKey(IgnoreIDSet, containerID) { + continue + } + + var dataItem DataItem + stringMap := make(map[string]string) + + // convert map[interface{}]interface{} to map[string]string + for key, value := range record { + strKey := fmt.Sprintf("%v", key) + strValue := toString(value) + stringMap[strKey] = strValue + } + + stringMap["Id"] = containerID + stringMap["Image"] = ImageIDMap[containerID] + stringMap["Name"] = NameIDMap[containerID] + stringMap["Computer"] = Computer + mapstructure.Decode(stringMap, &dataItem) + dataItems = append(dataItems, dataItem) + } + DataUpdateMutex.Unlock() + + if len(dataItems) > 0 { + logEntry := ContainerLogBlob{ + DataType: DataType, + IPName: IPName, + DataItems: dataItems} + + marshalled, err := json.Marshal(logEntry) + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) + req.Header.Set("Content-Type", "application/json") + + resp, err := HTTPClient.Do(req) + elapsed := time.Since(start) + + if err != nil { + Log("Error when sending request %s \n", err.Error()) + Log("Failed to flush %d records after %s", len(dataItems), elapsed) + return output.FLB_RETRY + } + + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + } + return output.FLB_RETRY + } + + Log("Successfully flushed %d records in %s", len(dataItems), elapsed) + } + + return output.FLB_OK +} + +func containsKey(currentMap map[string]bool, key string) bool { + _, c := currentMap[key] + return c +} + +func toString(s interface{}) string { + value := s.([]uint8) + return string([]byte(value[:])) +} + +func getContainerIDFromFilePath(filepath string) string { + start := strings.LastIndex(filepath, "-") + end := strings.LastIndex(filepath, ".") + return filepath[start+1 : end] +} + +// ReadConfig reads and populates plugin configuration +func ReadConfig(pluginConfPath string) map[string]string { + + pluginConf, err := ReadConfiguration(pluginConfPath) + omsadminConf, err := ReadConfiguration(pluginConf["omsadmin_conf_path"]) + + if err != nil { + Log(err.Error()) + } + + containerHostName, err := ioutil.ReadFile(pluginConf["container_host_file_path"]) + if err != nil { + Log("Error when reading containerHostName file %s", err.Error()) + } + + Computer = strings.TrimSuffix(toString(containerHostName), "\n") + Log("Computer == %s \n", Computer) + + OMSEndpoint = omsadminConf["OMS_ENDPOINT"] + Log("OMSEndpoint %s", OMSEndpoint) + + return pluginConf +} diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go new file mode 100644 index 000000000..dad0ede81 --- /dev/null +++ b/source/code/go/src/plugins/out_oms.go @@ -0,0 +1,57 @@ +package main + +import ( + "github.com/fluent/fluent-bit-go/output" +) +import ( + "C" + "unsafe" +) + +//export FLBPluginRegister +func FLBPluginRegister(ctx unsafe.Pointer) int { + return output.FLBPluginRegister(ctx, "oms", "Stdout GO!") +} + +//export FLBPluginInit +// (fluentbit will call this) +// ctx (context) pointer to fluentbit context (state/ c code) +func FLBPluginInit(ctx unsafe.Pointer) int { + Log("Initializing out_oms go plugin for fluentbit") + PluginConfiguration = ReadConfig("/etc/opt/microsoft/docker-cimprov/out_oms.conf") + CreateHTTPClient() + updateContainersData() + return output.FLB_OK +} + +//export FLBPluginFlush +func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { + var count int + var ret int + var record map[interface{}]interface{} + var records []map[interface{}]interface{} + + // Create Fluent Bit decoder + dec := output.NewDecoder(data, int(length)) + + // Iterate Records + count = 0 + for { + // Extract Record + ret, _, record = output.GetRecord(dec) + if ret != 0 { + break + } + records = append(records, record) + count++ + } + return PostDataHelper(records) +} + +// FLBPluginExit exits the plugin +func FLBPluginExit() int { + return output.FLB_OK +} + +func main() { +} diff --git a/source/code/go/src/plugins/utils.go b/source/code/go/src/plugins/utils.go new file mode 100644 index 000000000..0e33f43f9 --- /dev/null +++ b/source/code/go/src/plugins/utils.go @@ -0,0 +1,67 @@ +package main + +import ( + "bufio" + "crypto/tls" + "log" + "net/http" + "os" + "strings" +) + +// ReadConfiguration reads a property file +func ReadConfiguration(filename string) (map[string]string, error) { + config := map[string]string{} + + if len(filename) == 0 { + return config, nil + } + + file, err := os.Open(filename) + if err != nil { + log.Fatal(err) + return nil, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + currentLine := scanner.Text() + if equalIndex := strings.Index(currentLine, "="); equalIndex >= 0 { + if key := strings.TrimSpace(currentLine[:equalIndex]); len(key) > 0 { + value := "" + if len(currentLine) > equalIndex { + value = strings.TrimSpace(currentLine[equalIndex+1:]) + } + config[key] = value + } + } + } + + if err := scanner.Err(); err != nil { + log.Fatal(err) + return nil, err + } + + return config, nil +} + +// CreateHTTPClient used to create the client for sending post requests to OMSEndpoint +func CreateHTTPClient() { + + cert, err := tls.LoadX509KeyPair(PluginConfiguration["cert_file_path"], PluginConfiguration["key_file_path"]) + if err != nil { + Log("Error when loading cert %s", err.Error()) + } + + tlsConfig := &tls.Config{ + Certificates: []tls.Certificate{cert}, + } + + tlsConfig.BuildNameToCertificate() + transport := &http.Transport{TLSClientConfig: tlsConfig} + + HTTPClient = http.Client{Transport: transport} + + Log("Successfully created HTTP Client") +} From b02f2ec57e47c68648596ef7487bf320fa5e9331 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Fri, 14 Sep 2018 11:24:12 -0700 Subject: [PATCH 07/88] Dilipr/glide updates (#127) * Updating glide.* files to include lumberjack --- source/code/go/src/plugins/glide.lock | 6 ++++-- source/code/go/src/plugins/glide.yaml | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/glide.lock b/source/code/go/src/plugins/glide.lock index 79745820b..4597b594a 100644 --- a/source/code/go/src/plugins/glide.lock +++ b/source/code/go/src/plugins/glide.lock @@ -1,5 +1,5 @@ -hash: a4b073d827b5cbb4a772dada9ff3bcf55c55afc3cda83ddec1e6edcdca8e219a -updated: 2018-09-06T04:07:01.808678175Z +hash: bb32415f402ab29751f29b8e394bc974cbc31861453d817aaeb94ef83dacc488 +updated: 2018-09-14T18:14:28.748047598Z imports: - name: github.com/fluent/fluent-bit-go version: c4a158a6e3a793166c6ecfa2d5c80d71eada8959 @@ -83,6 +83,8 @@ imports: - rate - name: gopkg.in/inf.v0 version: 3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4 +- name: gopkg.in/natefinch/lumberjack.v2 + version: a96e63847dc3c67d17befa69c303767e2f84e54f - name: gopkg.in/yaml.v2 version: 670d4cfef0544295bc27a114dbac37980d83185a - name: k8s.io/api diff --git a/source/code/go/src/plugins/glide.yaml b/source/code/go/src/plugins/glide.yaml index b986ece21..403e1efc4 100644 --- a/source/code/go/src/plugins/glide.yaml +++ b/source/code/go/src/plugins/glide.yaml @@ -5,6 +5,8 @@ import: - output - package: github.com/mitchellh/mapstructure version: ^1.0.0 +- package: gopkg.in/natefinch/lumberjack.v2 + version: ^2.1.0 - package: k8s.io/apimachinery subpackages: - pkg/apis/meta/v1 From e01c67845cd5d99f77b8dafd3e579d933984c3af Mon Sep 17 00:00:00 2001 From: Vishwanath Narasimhan Date: Mon, 17 Sep 2018 15:42:01 -0700 Subject: [PATCH 08/88] containerID="" for pull issues --- source/code/plugin/in_kube_podinventory.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index f478705f6..2cd1e1bc3 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -143,7 +143,8 @@ def parse_and_emit_records(podInventory, serviceList) if !container['containerID'].nil? record['ContainerID'] = container['containerID'].split("//")[1] else - record['ContainerID'] = "00000000-0000-0000-0000-000000000000" + # for containers that have image issues (like invalid image/tag etc..) this will be empty. do not make it all 0 + record['ContainerID'] = "" end #keeping this as which is same as InstanceName in perf table record['ContainerName'] = podUid + "/" +container['name'] From b0ba22deaf43c29058d61f0dd76c2c64c34f5ac4 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Tue, 18 Sep 2018 16:59:46 -0700 Subject: [PATCH 09/88] Using KubeAPI for getting image,name. Adding more logs (#129) * Using KubeAPI for getting image,name. Adding more logs * Moving log file and state file to within the omsagent container * Changing log and state paths --- installer/conf/td-agent-bit.conf | 4 +- source/code/go/src/plugins/oms.go | 105 +++++++++++++------------- source/code/go/src/plugins/out_oms.go | 2 +- 3 files changed, 54 insertions(+), 57 deletions(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index cf490c077..84a9fcf94 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -2,13 +2,13 @@ Flush 5 Log_Level info Parsers_File /etc/td-agent-bit/parsers.conf - Log_File /var/log/fluent-bit.log + Log_File /var/opt/microsoft/docker-cimprov/log/fluent-bit.log [INPUT] Name tail Tag oms.container.log.* Path /var/log/containers/*.log - DB /var/log/fblogs.db + DB /var/opt/microsoft/docker-cimprov/state/fblogs.db Parser docker Mem_Buf_Limit 30m Path_Key filepath diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 49472c74b..c18135dcc 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -26,7 +26,6 @@ const DataType = "CONTAINER_LOG_BLOB" // IPName for Container Log const IPName = "Containers" -const containerInventoryPath = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory" const defaultContainerInventoryRefreshInterval = 60 const defaultKubeSystemContainersRefreshInterval = 300 @@ -51,6 +50,9 @@ var ( // DataUpdateMutex read and write mutex access to the container id set DataUpdateMutex = &sync.Mutex{} + + // ClientSet for querying KubeAPIs + ClientSet *kubernetes.Clientset ) var ( @@ -61,27 +63,6 @@ var ( Log = FLBLogger.Printf ) -// ContainerInventory represents the container info -type ContainerInventory struct { - ElementName string `json:"ElementName"` - CreatedTime string `json:"CreatedTime"` - State string `json:"State"` - ExitCode int `json:"ExitCode"` - StartedTime string `json:"StartedTime"` - FinishedTime string `json:"FinishedTime"` - ImageID string `json:"ImageId"` - Image string `json:"Image"` - Repository string `json:"Repository"` - ImageTag string `json:"ImageTag"` - ComposeGroup string `json:"ComposeGroup"` - ContainerHostname string `json:"ContainerHostname"` - Computer string `json:"Computer"` - Command string `json:"Command"` - EnvironmentVar string `json:"EnvironmentVar"` - Ports string `json:"Ports"` - Links string `json:"Links"` -} - // DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin type DataItem struct { LogEntry string `json:"LogEntry"` @@ -108,29 +89,25 @@ func populateMaps() { _imageIDMap := make(map[string]string) _nameIDMap := make(map[string]string) - files, err := ioutil.ReadDir(containerInventoryPath) + pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{}) if err != nil { - Log("error when reading container inventory %s\n", err.Error()) + Log("Error getting pods %s\n", err.Error()) } - for _, file := range files { - fullPath := fmt.Sprintf("%s/%s", containerInventoryPath, file.Name()) - fileContent, err := ioutil.ReadFile(fullPath) - if err != nil { - Log("Error reading file content %s", fullPath) - Log(err.Error()) - } - var containerInventory ContainerInventory - unmarshallErr := json.Unmarshal(fileContent, &containerInventory) - - if unmarshallErr != nil { - Log("Unmarshall error when reading file %s %s \n", fullPath, unmarshallErr.Error()) + for _, pod := range pods.Items { + for _, status := range pod.Status.ContainerStatuses { + lastSlashIndex := strings.LastIndex(status.ContainerID, "/") + containerID := status.ContainerID[lastSlashIndex+1 : len(status.ContainerID)] + image := status.Image + name := fmt.Sprintf("%s/%s", pod.UID, status.Name) + if containerID != "" { + _imageIDMap[containerID] = image + _nameIDMap[containerID] = name + } } - - _imageIDMap[file.Name()] = containerInventory.Image - _nameIDMap[file.Name()] = containerInventory.ElementName } + Log("Locking to update image and name maps") DataUpdateMutex.Lock() ImageIDMap = _imageIDMap @@ -164,7 +141,7 @@ func createLogger() *log.Logger { logger.SetOutput(&lumberjack.Logger{ Filename: path, MaxSize: 10, //megabytes - MaxBackups: 3, + MaxBackups: 1, MaxAge: 28, //days Compress: true, // false by default }) @@ -222,17 +199,8 @@ func updateKubeSystemContainerIDs() { } Log("Kube System Log Collection is DISABLED. Collecting containerIds to drop their records") - config, err := rest.InClusterConfig() - if err != nil { - Log("Error getting config %s\n", err.Error()) - } - clientset, err := kubernetes.NewForConfig(config) - if err != nil { - Log("Error getting clientset %s", err.Error()) - } - - pods, err := clientset.CoreV1().Pods("kube-system").List(metav1.ListOptions{}) + pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{}) if err != nil { Log("Error getting pods %s\n", err.Error()) } @@ -278,8 +246,27 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { } stringMap["Id"] = containerID - stringMap["Image"] = ImageIDMap[containerID] - stringMap["Name"] = NameIDMap[containerID] + + if val, ok := ImageIDMap[containerID]; ok { + stringMap["Image"] = val + } else { + Log("ContainerId %s not present in Map ", containerID) + Log("CurrentMap Snapshot \n") + for k, v := range ImageIDMap { + Log("%s ==> %s", k, v) + } + } + + if val, ok := NameIDMap[containerID]; ok { + stringMap["Name"] = val + } else { + Log("ContainerId %s not present in Map ", containerID) + Log("CurrentMap Snapshot \n") + for k, v := range NameIDMap { + Log("%s ==> %s", k, v) + } + } + stringMap["Computer"] = Computer mapstructure.Decode(stringMap, &dataItem) dataItems = append(dataItems, dataItem) @@ -334,8 +321,8 @@ func getContainerIDFromFilePath(filepath string) string { return filepath[start+1 : end] } -// ReadConfig reads and populates plugin configuration -func ReadConfig(pluginConfPath string) map[string]string { +// InitializeConfig reads and populates plugin configuration +func InitializeConfig(pluginConfPath string) map[string]string { pluginConf, err := ReadConfiguration(pluginConfPath) omsadminConf, err := ReadConfiguration(pluginConf["omsadmin_conf_path"]) @@ -355,5 +342,15 @@ func ReadConfig(pluginConfPath string) map[string]string { OMSEndpoint = omsadminConf["OMS_ENDPOINT"] Log("OMSEndpoint %s", OMSEndpoint) + config, err := rest.InClusterConfig() + if err != nil { + Log("Error getting config %s\n", err.Error()) + } + + ClientSet, err = kubernetes.NewForConfig(config) + if err != nil { + Log("Error getting clientset %s", err.Error()) + } + return pluginConf } diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index dad0ede81..8c23f47a8 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -18,7 +18,7 @@ func FLBPluginRegister(ctx unsafe.Pointer) int { // ctx (context) pointer to fluentbit context (state/ c code) func FLBPluginInit(ctx unsafe.Pointer) int { Log("Initializing out_oms go plugin for fluentbit") - PluginConfiguration = ReadConfig("/etc/opt/microsoft/docker-cimprov/out_oms.conf") + PluginConfiguration = InitializeConfig("/etc/opt/microsoft/docker-cimprov/out_oms.conf") CreateHTTPClient() updateContainersData() return output.FLB_OK From 97834199721172ba0a67828b19a6f26de1a4b0a0 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Thu, 27 Sep 2018 14:35:29 -0700 Subject: [PATCH 10/88] Dilipr/mark comments (#130) * Marks Comments + Error Handling * Drop records from files that are not in k8s format * Remove unnecessary log line' * Adding Log to the file that doesn't conform to the expected format --- source/code/go/src/plugins/oms.go | 227 ++++++++++++++------------ source/code/go/src/plugins/out_oms.go | 6 +- source/code/go/src/plugins/utils.go | 1 + 3 files changed, 123 insertions(+), 111 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index c18135dcc..2e9e2f3d0 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -12,7 +12,8 @@ import ( "strings" "sync" "time" - +) +import ( "github.com/fluent/fluent-bit-go/output" "github.com/mitchellh/mapstructure" lumberjack "gopkg.in/natefinch/lumberjack.v2" @@ -24,6 +25,9 @@ import ( // DataType for Container Log const DataType = "CONTAINER_LOG_BLOB" +// ContainerLogPluginConfFilePath --> config file path for container log plugin +const ContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf" + // IPName for Container Log const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 @@ -47,18 +51,22 @@ var ( NameIDMap map[string]string // IgnoreIDSet set of container Ids of kube-system pods IgnoreIDSet map[string]bool - // DataUpdateMutex read and write mutex access to the container id set DataUpdateMutex = &sync.Mutex{} - // ClientSet for querying KubeAPIs ClientSet *kubernetes.Clientset ) +var ( + // KubeSystemContainersRefreshTicker updates the kube-system containers + KubeSystemContainersRefreshTicker = time.NewTicker(time.Second * 300) + // ContainerImageNameRefreshTicker updates the container image and names periodically + ContainerImageNameRefreshTicker = time.NewTicker(time.Second * 60) +) + var ( // FLBLogger stream FLBLogger = createLogger() - // Log wrapper function Log = FLBLogger.Printf ) @@ -83,41 +91,7 @@ type ContainerLogBlob struct { DataItems []DataItem `json:"DataItems"` } -func populateMaps() { - - Log("Updating ImageIDMap and NameIDMap") - - _imageIDMap := make(map[string]string) - _nameIDMap := make(map[string]string) - - pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{}) - if err != nil { - Log("Error getting pods %s\n", err.Error()) - } - - for _, pod := range pods.Items { - for _, status := range pod.Status.ContainerStatuses { - lastSlashIndex := strings.LastIndex(status.ContainerID, "/") - containerID := status.ContainerID[lastSlashIndex+1 : len(status.ContainerID)] - image := status.Image - name := fmt.Sprintf("%s/%s", pod.UID, status.Name) - if containerID != "" { - _imageIDMap[containerID] = image - _nameIDMap[containerID] = name - } - } - } - - Log("Locking to update image and name maps") - DataUpdateMutex.Lock() - ImageIDMap = _imageIDMap - NameIDMap = _nameIDMap - DataUpdateMutex.Unlock() - Log("Unlocking after updating image and name maps") -} - func createLogger() *log.Logger { - var logfile *os.File path := "/var/opt/microsoft/docker-cimprov/log/fluent-bit-out-oms-runtime.log" if _, err := os.Stat(path); err == nil { @@ -150,88 +124,85 @@ func createLogger() *log.Logger { return logger } -func updateContainersData() { +func updateContainerImageNameMaps() { + for ; true; <-ContainerImageNameRefreshTicker.C { + Log("Updating ImageIDMap and NameIDMap") - containerInventoryRefreshInterval, err := strconv.Atoi(PluginConfiguration["container_inventory_refresh_interval"]) - if err != nil { - Log("Error Reading Container Inventory Refresh Interval %s", err.Error()) - containerInventoryRefreshInterval = defaultContainerInventoryRefreshInterval - } - Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval) - go initMaps(containerInventoryRefreshInterval) + _imageIDMap := make(map[string]string) + _nameIDMap := make(map[string]string) - kubeSystemContainersRefreshInterval, err := strconv.Atoi(PluginConfiguration["kube_system_containers_refresh_interval"]) - if err != nil { - Log("Error Reading Kube System Container Ids Refresh Interval %s", err.Error()) - kubeSystemContainersRefreshInterval = defaultKubeSystemContainersRefreshInterval - } - Log("kubeSystemContainersRefreshInterval = %d \n", kubeSystemContainersRefreshInterval) - - go updateIgnoreContainerIds(kubeSystemContainersRefreshInterval) -} - -func initMaps(refreshInterval int) { - ImageIDMap = make(map[string]string) - NameIDMap = make(map[string]string) - - populateMaps() - - for range time.Tick(time.Second * time.Duration(refreshInterval)) { - populateMaps() - } -} - -func updateIgnoreContainerIds(refreshInterval int) { - IgnoreIDSet = make(map[string]bool) + pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{}) + if err != nil { + Log("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) + } - updateKubeSystemContainerIDs() + for _, pod := range pods.Items { + for _, status := range pod.Status.ContainerStatuses { + lastSlashIndex := strings.LastIndex(status.ContainerID, "/") + containerID := status.ContainerID[lastSlashIndex+1 : len(status.ContainerID)] + image := status.Image + name := fmt.Sprintf("%s/%s", pod.UID, status.Name) + if containerID != "" { + _imageIDMap[containerID] = image + _nameIDMap[containerID] = name + } + } + } - for range time.Tick(time.Second * time.Duration(refreshInterval)) { - updateKubeSystemContainerIDs() + Log("Locking to update image and name maps") + DataUpdateMutex.Lock() + ImageIDMap = _imageIDMap + NameIDMap = _nameIDMap + DataUpdateMutex.Unlock() + Log("Unlocking after updating image and name maps") } } func updateKubeSystemContainerIDs() { + for ; true; <-KubeSystemContainersRefreshTicker.C { + if strings.Compare(os.Getenv("DISABLE_KUBE_SYSTEM_LOG_COLLECTION"), "true") != 0 { + Log("Kube System Log Collection is ENABLED.") + return + } - if strings.Compare(os.Getenv("DISABLE_KUBE_SYSTEM_LOG_COLLECTION"), "true") != 0 { - Log("Kube System Log Collection is ENABLED.") - return - } - - Log("Kube System Log Collection is DISABLED. Collecting containerIds to drop their records") + Log("Kube System Log Collection is DISABLED. Collecting containerIds to drop their records") - pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{}) - if err != nil { - Log("Error getting pods %s\n", err.Error()) - } + pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{}) + if err != nil { + Log("Error getting pods %s\nIt is ok to log here and continue. Kube-system logs will be collected", err.Error()) + } - _ignoreIDSet := make(map[string]bool) - for _, pod := range pods.Items { - for _, status := range pod.Status.ContainerStatuses { - lastSlashIndex := strings.LastIndex(status.ContainerID, "/") - _ignoreIDSet[status.ContainerID[lastSlashIndex+1:len(status.ContainerID)]] = true + _ignoreIDSet := make(map[string]bool) + for _, pod := range pods.Items { + for _, status := range pod.Status.ContainerStatuses { + lastSlashIndex := strings.LastIndex(status.ContainerID, "/") + _ignoreIDSet[status.ContainerID[lastSlashIndex+1:len(status.ContainerID)]] = true + } } - } - Log("Locking to update kube-system container IDs") - DataUpdateMutex.Lock() - IgnoreIDSet = _ignoreIDSet - DataUpdateMutex.Unlock() - Log("Unlocking after updating kube-system container IDs") + Log("Locking to update kube-system container IDs") + DataUpdateMutex.Lock() + IgnoreIDSet = _ignoreIDSet + DataUpdateMutex.Unlock() + Log("Unlocking after updating kube-system container IDs") + } } // PostDataHelper sends data to the OMS endpoint func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { + defer DataUpdateMutex.Unlock() + start := time.Now() var dataItems []DataItem DataUpdateMutex.Lock() for _, record := range tailPluginRecords { - containerID := getContainerIDFromFilePath(toString(record["Filepath"])) + filepath := toString(record["Filepath"]) + containerID := getContainerIDFromFilePath(filepath) - if containsKey(IgnoreIDSet, containerID) { + if containerID == "" || containsKey(IgnoreIDSet, containerID) { continue } @@ -271,7 +242,6 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { mapstructure.Decode(stringMap, &dataItem) dataItems = append(dataItems, dataItem) } - DataUpdateMutex.Unlock() if len(dataItems) > 0 { logEntry := ContainerLogBlob{ @@ -318,39 +288,80 @@ func toString(s interface{}) string { func getContainerIDFromFilePath(filepath string) string { start := strings.LastIndex(filepath, "-") end := strings.LastIndex(filepath, ".") + if start >= end || start == -1 || end == -1 { + // This means the file is not a managed Kubernetes docker log file. + // Drop all records from the file + Log("File %s is not a Kubernetes managed docker log file. Dropping all records from the file", filepath) + return "" + } return filepath[start+1 : end] } -// InitializeConfig reads and populates plugin configuration -func InitializeConfig(pluginConfPath string) map[string]string { +// InitializePlugin reads and populates plugin configuration +func InitializePlugin(pluginConfPath string) { + + IgnoreIDSet = make(map[string]bool) + ImageIDMap = make(map[string]string) + NameIDMap = make(map[string]string) - pluginConf, err := ReadConfiguration(pluginConfPath) - omsadminConf, err := ReadConfiguration(pluginConf["omsadmin_conf_path"]) + pluginConfig, err := ReadConfiguration(pluginConfPath) + if err != nil { + Log("Error Reading plugin config path : %s \n", err.Error()) + log.Fatalf("Error Reading plugin config path : %s \n", err.Error()) + } + omsadminConf, err := ReadConfiguration(pluginConfig["omsadmin_conf_path"]) if err != nil { Log(err.Error()) + log.Fatalf("Error Reading omsadmin configuration %s\n", err.Error()) } + OMSEndpoint = omsadminConf["OMS_ENDPOINT"] + Log("OMSEndpoint %s", OMSEndpoint) - containerHostName, err := ioutil.ReadFile(pluginConf["container_host_file_path"]) + // Initialize image,name map refresh ticker + containerInventoryRefreshInterval, err := strconv.Atoi(pluginConfig["container_inventory_refresh_interval"]) if err != nil { - Log("Error when reading containerHostName file %s", err.Error()) + Log("Error Reading Container Inventory Refresh Interval %s", err.Error()) + Log("Using Default Refresh Interval of %d s\n", defaultContainerInventoryRefreshInterval) + containerInventoryRefreshInterval = defaultContainerInventoryRefreshInterval } + Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval) + ContainerImageNameRefreshTicker = time.NewTicker(time.Second * time.Duration(containerInventoryRefreshInterval)) + // Initialize Kube System Refresh Ticker + kubeSystemContainersRefreshInterval, err := strconv.Atoi(pluginConfig["kube_system_containers_refresh_interval"]) + if err != nil { + Log("Error Reading Kube System Container Ids Refresh Interval %s", err.Error()) + Log("Using Default Refresh Interval of %d s\n", defaultKubeSystemContainersRefreshInterval) + kubeSystemContainersRefreshInterval = defaultKubeSystemContainersRefreshInterval + } + Log("kubeSystemContainersRefreshInterval = %d \n", kubeSystemContainersRefreshInterval) + KubeSystemContainersRefreshTicker = time.NewTicker(time.Second * time.Duration(kubeSystemContainersRefreshInterval)) + + // Populate Computer field + containerHostName, err := ioutil.ReadFile(pluginConfig["container_host_file_path"]) + if err != nil { + // It is ok to log here and continue, because only the Computer column will be missing, + // which can be deduced from a combination of containerId, and docker logs on the node + Log("Error when reading containerHostName file %s.\n It is ok to log here and continue, because only the Computer column will be missing, which can be deduced from a combination of containerId, and docker logs on the nodes\n", err.Error()) + } Computer = strings.TrimSuffix(toString(containerHostName), "\n") Log("Computer == %s \n", Computer) - OMSEndpoint = omsadminConf["OMS_ENDPOINT"] - Log("OMSEndpoint %s", OMSEndpoint) - + // Initialize KubeAPI Client config, err := rest.InClusterConfig() if err != nil { - Log("Error getting config %s\n", err.Error()) + Log("Error getting config %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) } ClientSet, err = kubernetes.NewForConfig(config) if err != nil { - Log("Error getting clientset %s", err.Error()) + Log("Error getting clientset %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) } - return pluginConf + PluginConfiguration = pluginConfig + + CreateHTTPClient() + go updateKubeSystemContainerIDs() + go updateContainerImageNameMaps() } diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index 8c23f47a8..ec9a573d1 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -18,9 +18,7 @@ func FLBPluginRegister(ctx unsafe.Pointer) int { // ctx (context) pointer to fluentbit context (state/ c code) func FLBPluginInit(ctx unsafe.Pointer) int { Log("Initializing out_oms go plugin for fluentbit") - PluginConfiguration = InitializeConfig("/etc/opt/microsoft/docker-cimprov/out_oms.conf") - CreateHTTPClient() - updateContainersData() + InitializePlugin(ContainerLogPluginConfFilePath) return output.FLB_OK } @@ -50,6 +48,8 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { // FLBPluginExit exits the plugin func FLBPluginExit() int { + KubeSystemContainersRefreshTicker.Stop() + ContainerImageNameRefreshTicker.Stop() return output.FLB_OK } diff --git a/source/code/go/src/plugins/utils.go b/source/code/go/src/plugins/utils.go index 0e33f43f9..1ac9b05a9 100644 --- a/source/code/go/src/plugins/utils.go +++ b/source/code/go/src/plugins/utils.go @@ -52,6 +52,7 @@ func CreateHTTPClient() { cert, err := tls.LoadX509KeyPair(PluginConfiguration["cert_file_path"], PluginConfiguration["key_file_path"]) if err != nil { Log("Error when loading cert %s", err.Error()) + log.Fatalf("Error when loading cert %s", err.Error()) } tlsConfig := &tls.Config{ From 8e35b7365bab9de6d087718887d5021167617a0d Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Thu, 27 Sep 2018 15:52:13 -0700 Subject: [PATCH 11/88] Rashmi/segfault latest (#132) * adding null checks in all providers * fixing type * fixing type * adding more null checks * update cjson --- source/code/cjson/cJSON.c | 3478 +++++++++++++---- source/code/cjson/cJSON.h | 398 +- ...iner_ContainerInventory_Class_Provider.cpp | 34 +- ...ner_ContainerStatistics_Class_Provider.cpp | 39 +- .../Container_DaemonEvent_Class_Provider.cpp | 6 +- ...ontainer_ImageInventory_Class_Provider.cpp | 19 +- .../Container_Process_Class_Provider.cpp | 2 +- 7 files changed, 3146 insertions(+), 830 deletions(-) diff --git a/source/code/cjson/cJSON.c b/source/code/cjson/cJSON.c index 77dbfe959..c561c7ceb 100755 --- a/source/code/cjson/cJSON.c +++ b/source/code/cjson/cJSON.c @@ -1,770 +1,2930 @@ /* - Copyright (c) 2009 Dave Gamble - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. +Copyright (c) 2009-2017 Dave Gamble and cJSON contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. */ /* cJSON */ /* JSON parser in C. */ +/* disable warnings about old C89 functions in MSVC */ +#if !defined(_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) +#define _CRT_SECURE_NO_DEPRECATE +#endif + +#ifdef __GNUC__ +#pragma GCC visibility push(default) +#endif +#if defined(_MSC_VER) +#pragma warning (push) +/* disable warning about single line comments in system headers */ +#pragma warning (disable : 4001) +#endif + #include #include #include #include -#include #include #include + +#ifdef ENABLE_LOCALES +#include +#endif + +#if defined(_MSC_VER) +#pragma warning (pop) +#endif +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif + #include "cJSON.h" -static const char *ep; -const char *cJSON_GetErrorPtr(void) {return ep;} +/* define our own boolean type */ +#define true ((cJSON_bool)1) +#define false ((cJSON_bool)0) -static int cJSON_strcasecmp(const char *s1,const char *s2) +typedef struct { + const unsigned char *json; + size_t position; +} error; +static error global_error = { NULL, 0 }; + +CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void) { - if (!s1) return (s1==s2)?0:1;if (!s2) return 1; - for(; tolower(*s1) == tolower(*s2); ++s1, ++s2) if(*s1 == 0) return 0; - return tolower(*(const unsigned char *)s1) - tolower(*(const unsigned char *)s2); + return (const char*)(global_error.json + global_error.position); } -static void *(*cJSON_malloc)(size_t sz) = malloc; -static void (*cJSON_free)(void *ptr) = free; +CJSON_PUBLIC(char *) cJSON_GetStringValue(cJSON *item) { + if (!cJSON_IsString(item)) { + return NULL; + } + + return item->valuestring; +} -static char* cJSON_strdup(const char* str) +/* This is a safeguard to prevent copy-pasters from using incompatible C and header files */ +#if (CJSON_VERSION_MAJOR != 1) || (CJSON_VERSION_MINOR != 7) || (CJSON_VERSION_PATCH != 8) +#error cJSON.h and cJSON.c have different versions. Make sure that both have the same. +#endif + +CJSON_PUBLIC(const char*) cJSON_Version(void) { - size_t len; - char* copy; + static char version[15]; + sprintf(version, "%i.%i.%i", CJSON_VERSION_MAJOR, CJSON_VERSION_MINOR, CJSON_VERSION_PATCH); - len = strlen(str) + 1; - if (!(copy = (char*)cJSON_malloc(len))) return 0; - memcpy(copy,str,len); - return copy; + return version; } -void cJSON_InitHooks(cJSON_Hooks* hooks) +/* Case insensitive string comparison, doesn't consider two NULL pointers equal though */ +static int case_insensitive_strcmp(const unsigned char *string1, const unsigned char *string2) { - if (!hooks) { /* Reset hooks */ - cJSON_malloc = malloc; - cJSON_free = free; - return; - } + if ((string1 == NULL) || (string2 == NULL)) + { + return 1; + } + + if (string1 == string2) + { + return 0; + } + + for (; tolower(*string1) == tolower(*string2); (void)string1++, string2++) + { + if (*string1 == '\0') + { + return 0; + } + } + + return tolower(*string1) - tolower(*string2); +} - cJSON_malloc = (hooks->malloc_fn)?hooks->malloc_fn:malloc; - cJSON_free = (hooks->free_fn)?hooks->free_fn:free; +typedef struct internal_hooks +{ + void *(CJSON_CDECL *allocate)(size_t size); + void (CJSON_CDECL *deallocate)(void *pointer); + void *(CJSON_CDECL *reallocate)(void *pointer, size_t size); +} internal_hooks; + +#if defined(_MSC_VER) +/* work around MSVC error C2322: '...' address of dillimport '...' is not static */ +static void * CJSON_CDECL internal_malloc(size_t size) +{ + return malloc(size); +} +static void CJSON_CDECL internal_free(void *pointer) +{ + free(pointer); +} +static void * CJSON_CDECL internal_realloc(void *pointer, size_t size) +{ + return realloc(pointer, size); } +#else +#define internal_malloc malloc +#define internal_free free +#define internal_realloc realloc +#endif -/* Internal constructor. */ -static cJSON *cJSON_New_Item(void) +static internal_hooks global_hooks = { internal_malloc, internal_free, internal_realloc }; + +static unsigned char* cJSON_strdup(const unsigned char* string, const internal_hooks * const hooks) { - cJSON* node = (cJSON*)cJSON_malloc(sizeof(cJSON)); - if (node) memset(node,0,sizeof(cJSON)); - return node; + size_t length = 0; + unsigned char *copy = NULL; + + if (string == NULL) + { + return NULL; + } + + length = strlen((const char*)string) + sizeof(""); + copy = (unsigned char*)hooks->allocate(length); + if (copy == NULL) + { + return NULL; + } + memcpy(copy, string, length); + + return copy; } +CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks) +{ + if (hooks == NULL) + { + /* Reset hooks */ + global_hooks.allocate = malloc; + global_hooks.deallocate = free; + global_hooks.reallocate = realloc; + return; + } + + global_hooks.allocate = malloc; + if (hooks->malloc_fn != NULL) + { + global_hooks.allocate = hooks->malloc_fn; + } + + global_hooks.deallocate = free; + if (hooks->free_fn != NULL) + { + global_hooks.deallocate = hooks->free_fn; + } + + /* use realloc only if both free and malloc are used */ + global_hooks.reallocate = NULL; + if ((global_hooks.allocate == malloc) && (global_hooks.deallocate == free)) + { + global_hooks.reallocate = realloc; + } +} + +/* Internal constructor. */ +static cJSON *cJSON_New_Item(const internal_hooks * const hooks) +{ + cJSON* node = (cJSON*)hooks->allocate(sizeof(cJSON)); + if (node) + { + memset(node, '\0', sizeof(cJSON)); + } + + return node; +} /* Delete a cJSON structure. */ -void cJSON_Delete(cJSON *c) +CJSON_PUBLIC(void) cJSON_Delete(cJSON *item) { - cJSON *next; - while (c) - { - next=c->next; - if (!(c->type&cJSON_IsReference) && c->child) cJSON_Delete(c->child); - if (!(c->type&cJSON_IsReference) && c->valuestring) cJSON_free(c->valuestring); - if (!(c->type&cJSON_StringIsConst) && c->string) cJSON_free(c->string); - cJSON_free(c); - c=next; - } + cJSON *next = NULL; + while (item != NULL) + { + next = item->next; + if (!(item->type & cJSON_IsReference) && (item->child != NULL)) + { + cJSON_Delete(item->child); + } + if (!(item->type & cJSON_IsReference) && (item->valuestring != NULL)) + { + global_hooks.deallocate(item->valuestring); + } + if (!(item->type & cJSON_StringIsConst) && (item->string != NULL)) + { + global_hooks.deallocate(item->string); + } + global_hooks.deallocate(item); + item = next; + } } -/* Parse the input text to generate a number, and populate the result into item. */ -static const char *parse_number(cJSON *item,const char *num) +/* get the decimal point character of the current locale */ +static unsigned char get_decimal_point(void) { - double n=0,sign=1,scale=0;int subscale=0,signsubscale=1; +#ifdef ENABLE_LOCALES + struct lconv *lconv = localeconv(); + return (unsigned char)lconv->decimal_point[0]; +#else + return '.'; +#endif +} - if (*num=='-') sign=-1,num++; /* Has sign? */ - if (*num=='0') num++; /* is zero */ - if (*num>='1' && *num<='9') do n=(n*10.0)+(*num++ -'0'); while (*num>='0' && *num<='9'); /* Number? */ - if (*num=='.' && num[1]>='0' && num[1]<='9') {num++; do n=(n*10.0)+(*num++ -'0'),scale--; while (*num>='0' && *num<='9');} /* Fractional part? */ - if (*num=='e' || *num=='E') /* Exponent? */ - { num++;if (*num=='+') num++; else if (*num=='-') signsubscale=-1,num++; /* With sign? */ - while (*num>='0' && *num<='9') subscale=(subscale*10)+(*num++ - '0'); /* Number? */ - } +typedef struct +{ + const unsigned char *content; + size_t length; + size_t offset; + size_t depth; /* How deeply nested (in arrays/objects) is the input at the current offset. */ + internal_hooks hooks; +} parse_buffer; + +/* check if the given size is left to read in a given parse buffer (starting with 1) */ +#define can_read(buffer, size) ((buffer != NULL) && (((buffer)->offset + size) <= (buffer)->length)) +/* check if the buffer can be accessed at the given index (starting with 0) */ +#define can_access_at_index(buffer, index) ((buffer != NULL) && (((buffer)->offset + index) < (buffer)->length)) +#define cannot_access_at_index(buffer, index) (!can_access_at_index(buffer, index)) +/* get a pointer to the buffer at the position */ +#define buffer_at_offset(buffer) ((buffer)->content + (buffer)->offset) - n=sign*n*pow(10.0,(scale+subscale*signsubscale)); /* number = +/- number.fraction * 10^+/- exponent */ - - item->valuedouble=n; - item->valueint=(int)n; - item->type=cJSON_Number; - return num; +/* Parse the input text to generate a number, and populate the result into item. */ +static cJSON_bool parse_number(cJSON * const item, parse_buffer * const input_buffer) +{ + double number = 0; + unsigned char *after_end = NULL; + unsigned char number_c_string[64]; + unsigned char decimal_point = get_decimal_point(); + size_t i = 0; + + if ((input_buffer == NULL) || (input_buffer->content == NULL)) + { + return false; + } + + /* copy the number into a temporary buffer and replace '.' with the decimal point + * of the current locale (for strtod) + * This also takes care of '\0' not necessarily being available for marking the end of the input */ + for (i = 0; (i < (sizeof(number_c_string) - 1)) && can_access_at_index(input_buffer, i); i++) + { + switch (buffer_at_offset(input_buffer)[i]) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '+': + case '-': + case 'e': + case 'E': + number_c_string[i] = buffer_at_offset(input_buffer)[i]; + break; + + case '.': + number_c_string[i] = decimal_point; + break; + + default: + goto loop_end; + } + } +loop_end: + number_c_string[i] = '\0'; + + number = strtod((const char*)number_c_string, (char**)&after_end); + if (number_c_string == after_end) + { + return false; /* parse_error */ + } + + item->valuedouble = number; + + /* use saturation in case of overflow */ + if (number >= INT_MAX) + { + item->valueint = INT_MAX; + } + else if (number <= INT_MIN) + { + item->valueint = INT_MIN; + } + else + { + item->valueint = (int)number; + } + + item->type = cJSON_Number; + + input_buffer->offset += (size_t)(after_end - number_c_string); + return true; } -static int pow2gt (int x) { --x; x|=x>>1; x|=x>>2; x|=x>>4; x|=x>>8; x|=x>>16; return x+1; } +/* don't ask me, but the original cJSON_SetNumberValue returns an integer or double */ +CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number) +{ + if (number >= INT_MAX) + { + object->valueint = INT_MAX; + } + else if (number <= INT_MIN) + { + object->valueint = INT_MIN; + } + else + { + object->valueint = (int)number; + } + + return object->valuedouble = number; +} -typedef struct {char *buffer; int length; int offset; } printbuffer; +typedef struct +{ + unsigned char *buffer; + size_t length; + size_t offset; + size_t depth; /* current nesting depth (for formatted printing) */ + cJSON_bool noalloc; + cJSON_bool format; /* is this print a formatted print */ + internal_hooks hooks; +} printbuffer; + +/* realloc printbuffer if necessary to have at least "needed" bytes more */ +static unsigned char* ensure(printbuffer * const p, size_t needed) +{ + unsigned char *newbuffer = NULL; + size_t newsize = 0; + + if ((p == NULL) || (p->buffer == NULL)) + { + return NULL; + } + + if ((p->length > 0) && (p->offset >= p->length)) + { + /* make sure that offset is valid */ + return NULL; + } + + if (needed > INT_MAX) + { + /* sizes bigger than INT_MAX are currently not supported */ + return NULL; + } + + needed += p->offset + 1; + if (needed <= p->length) + { + return p->buffer + p->offset; + } + + if (p->noalloc) { + return NULL; + } + + /* calculate new buffer size */ + if (needed > (INT_MAX / 2)) + { + /* overflow of int, use INT_MAX if possible */ + if (needed <= INT_MAX) + { + newsize = INT_MAX; + } + else + { + return NULL; + } + } + else + { + newsize = needed * 2; + } + + if (p->hooks.reallocate != NULL) + { + /* reallocate with realloc if available */ + newbuffer = (unsigned char*)p->hooks.reallocate(p->buffer, newsize); + if (newbuffer == NULL) + { + p->hooks.deallocate(p->buffer); + p->length = 0; + p->buffer = NULL; + + return NULL; + } + } + else + { + /* otherwise reallocate manually */ + newbuffer = (unsigned char*)p->hooks.allocate(newsize); + if (!newbuffer) + { + p->hooks.deallocate(p->buffer); + p->length = 0; + p->buffer = NULL; + + return NULL; + } + if (newbuffer) + { + memcpy(newbuffer, p->buffer, p->offset + 1); + } + p->hooks.deallocate(p->buffer); + } + p->length = newsize; + p->buffer = newbuffer; + + return newbuffer + p->offset; +} -static char* ensure(printbuffer *p,int needed) +/* calculate the new length of the string in a printbuffer and update the offset */ +static void update_offset(printbuffer * const buffer) { - char *newbuffer;int newsize; - if (!p || !p->buffer) return 0; - needed+=p->offset; - if (needed<=p->length) return p->buffer+p->offset; + const unsigned char *buffer_pointer = NULL; + if ((buffer == NULL) || (buffer->buffer == NULL)) + { + return; + } + buffer_pointer = buffer->buffer + buffer->offset; + + buffer->offset += strlen((const char*)buffer_pointer); +} - newsize=pow2gt(needed); - newbuffer=(char*)cJSON_malloc(newsize); - if (!newbuffer) {cJSON_free(p->buffer);p->length=0,p->buffer=0;return 0;} - if (newbuffer) memcpy(newbuffer,p->buffer,p->length); - cJSON_free(p->buffer); - p->length=newsize; - p->buffer=newbuffer; - return newbuffer+p->offset; +/* Render the number nicely from the given item into a string. */ +static cJSON_bool print_number(const cJSON * const item, printbuffer * const output_buffer) +{ + unsigned char *output_pointer = NULL; + double d = item->valuedouble; + int length = 0; + size_t i = 0; + unsigned char number_buffer[26]; /* temporary buffer to print the number into */ + unsigned char decimal_point = get_decimal_point(); + double test; + + if (output_buffer == NULL) + { + return false; + } + + /* This checks for NaN and Infinity */ + if ((d * 0) != 0) + { + length = sprintf((char*)number_buffer, "null"); + } + else + { + /* Try 15 decimal places of precision to avoid nonsignificant nonzero digits */ + length = sprintf((char*)number_buffer, "%1.15g", d); + + /* Check whether the original double can be recovered */ + if ((sscanf((char*)number_buffer, "%lg", &test) != 1) || ((double)test != d)) + { + /* If not, print with 17 decimal places of precision */ + length = sprintf((char*)number_buffer, "%1.17g", d); + } + } + + /* sprintf failed or buffer overrun occured */ + if ((length < 0) || (length >(int)(sizeof(number_buffer) - 1))) + { + return false; + } + + /* reserve appropriate space in the output */ + output_pointer = ensure(output_buffer, (size_t)length + sizeof("")); + if (output_pointer == NULL) + { + return false; + } + + /* copy the printed number to the output and replace locale + * dependent decimal point with '.' */ + for (i = 0; i < ((size_t)length); i++) + { + if (number_buffer[i] == decimal_point) + { + output_pointer[i] = '.'; + continue; + } + + output_pointer[i] = number_buffer[i]; + } + output_pointer[i] = '\0'; + + output_buffer->offset += (size_t)length; + + return true; } -static int update(printbuffer *p) +/* parse 4 digit hexadecimal number */ +static unsigned parse_hex4(const unsigned char * const input) { - char *str; - if (!p || !p->buffer) return 0; - str=p->buffer+p->offset; - return p->offset+strlen(str); + unsigned int h = 0; + size_t i = 0; + + for (i = 0; i < 4; i++) + { + /* parse digit */ + if ((input[i] >= '0') && (input[i] <= '9')) + { + h += (unsigned int)input[i] - '0'; + } + else if ((input[i] >= 'A') && (input[i] <= 'F')) + { + h += (unsigned int)10 + input[i] - 'A'; + } + else if ((input[i] >= 'a') && (input[i] <= 'f')) + { + h += (unsigned int)10 + input[i] - 'a'; + } + else /* invalid */ + { + return 0; + } + + if (i < 3) + { + /* shift left to make place for the next nibble */ + h = h << 4; + } + } + + return h; } -/* Render the number nicely from the given item into a string. */ -static char *print_number(cJSON *item,printbuffer *p) -{ - char *str=0; - double d=item->valuedouble; - if (d==0) - { - if (p) str=ensure(p,2); - else str=(char*)cJSON_malloc(2); /* special case for 0. */ - if (str) strcpy(str,"0"); - } - else if (fabs(((double)item->valueint)-d)<=DBL_EPSILON && d<=INT_MAX && d>=INT_MIN) - { - if (p) str=ensure(p,21); - else str=(char*)cJSON_malloc(21); /* 2^64+1 can be represented in 21 chars. */ - if (str) sprintf(str,"%d",item->valueint); - } - else - { - if (p) str=ensure(p,64); - else str=(char*)cJSON_malloc(64); /* This is a nice tradeoff. */ - if (str) - { - if (fabs(floor(d)-d)<=DBL_EPSILON && fabs(d)<1.0e60)sprintf(str,"%.0f",d); - else if (fabs(d)<1.0e-6 || fabs(d)>1.0e9) sprintf(str,"%e",d); - else sprintf(str,"%f",d); - } - } - return str; -} - -static unsigned parse_hex4(const char *str) -{ - unsigned h=0; - if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; - h=h<<4;str++; - if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; - h=h<<4;str++; - if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; - h=h<<4;str++; - if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; - return h; -} - -/* Parse the input text into an unescaped cstring, and populate item. */ -static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; -static const char *parse_string(cJSON *item,const char *str) -{ - const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2; - if (*str!='\"') {ep=str;return 0;} /* not a string! */ - - while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++; /* Skip escaped quotes. */ - - out=(char*)cJSON_malloc(len+1); /* This is how long we need for the string, roughly. */ - if (!out) return 0; - - ptr=str+1;ptr2=out; - while (*ptr!='\"' && *ptr) - { - if (*ptr!='\\') *ptr2++=*ptr++; - else - { - ptr++; - switch (*ptr) - { - case 'b': *ptr2++='\b'; break; - case 'f': *ptr2++='\f'; break; - case 'n': *ptr2++='\n'; break; - case 'r': *ptr2++='\r'; break; - case 't': *ptr2++='\t'; break; - case 'u': /* transcode utf16 to utf8. */ - uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */ - - if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; /* check for invalid. */ - - if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */ - { - if (ptr[1]!='\\' || ptr[2]!='u') break; /* missing second-half of surrogate. */ - uc2=parse_hex4(ptr+3);ptr+=6; - if (uc2<0xDC00 || uc2>0xDFFF) break; /* invalid second-half of surrogate. */ - uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF)); - } - - len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len; - - switch (len) { - case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; - case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; - case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; - case 1: *--ptr2 =(uc | firstByteMark[len]); - } - ptr2+=len; - break; - default: *ptr2++=*ptr; break; - } - ptr++; - } - } - *ptr2=0; - if (*ptr=='\"') ptr++; - item->valuestring=out; - item->type=cJSON_String; - return ptr; +/* converts a UTF-16 literal to UTF-8 +* A literal can be one or two sequences of the form \uXXXX */ +static unsigned char utf16_literal_to_utf8(const unsigned char * const input_pointer, const unsigned char * const input_end, unsigned char **output_pointer) +{ + long unsigned int codepoint = 0; + unsigned int first_code = 0; + const unsigned char *first_sequence = input_pointer; + unsigned char utf8_length = 0; + unsigned char utf8_position = 0; + unsigned char sequence_length = 0; + unsigned char first_byte_mark = 0; + + if ((input_end - first_sequence) < 6) + { + /* input ends unexpectedly */ + goto fail; + } + + /* get the first utf16 sequence */ + first_code = parse_hex4(first_sequence + 2); + + /* check that the code is valid */ + if (((first_code >= 0xDC00) && (first_code <= 0xDFFF))) + { + goto fail; + } + + /* UTF16 surrogate pair */ + if ((first_code >= 0xD800) && (first_code <= 0xDBFF)) + { + const unsigned char *second_sequence = first_sequence + 6; + unsigned int second_code = 0; + sequence_length = 12; /* \uXXXX\uXXXX */ + + if ((input_end - second_sequence) < 6) + { + /* input ends unexpectedly */ + goto fail; + } + + if ((second_sequence[0] != '\\') || (second_sequence[1] != 'u')) + { + /* missing second half of the surrogate pair */ + goto fail; + } + + /* get the second utf16 sequence */ + second_code = parse_hex4(second_sequence + 2); + /* check that the code is valid */ + if ((second_code < 0xDC00) || (second_code > 0xDFFF)) + { + /* invalid second half of the surrogate pair */ + goto fail; + } + + + /* calculate the unicode codepoint from the surrogate pair */ + codepoint = 0x10000 + (((first_code & 0x3FF) << 10) | (second_code & 0x3FF)); + } + else + { + sequence_length = 6; /* \uXXXX */ + codepoint = first_code; + } + + /* encode as UTF-8 + * takes at maximum 4 bytes to encode: + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + if (codepoint < 0x80) + { + /* normal ascii, encoding 0xxxxxxx */ + utf8_length = 1; + } + else if (codepoint < 0x800) + { + /* two bytes, encoding 110xxxxx 10xxxxxx */ + utf8_length = 2; + first_byte_mark = 0xC0; /* 11000000 */ + } + else if (codepoint < 0x10000) + { + /* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */ + utf8_length = 3; + first_byte_mark = 0xE0; /* 11100000 */ + } + else if (codepoint <= 0x10FFFF) + { + /* four bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx */ + utf8_length = 4; + first_byte_mark = 0xF0; /* 11110000 */ + } + else + { + /* invalid unicode codepoint */ + goto fail; + } + + /* encode as utf8 */ + for (utf8_position = (unsigned char)(utf8_length - 1); utf8_position > 0; utf8_position--) + { + /* 10xxxxxx */ + (*output_pointer)[utf8_position] = (unsigned char)((codepoint | 0x80) & 0xBF); + codepoint >>= 6; + } + /* encode first byte */ + if (utf8_length > 1) + { + (*output_pointer)[0] = (unsigned char)((codepoint | first_byte_mark) & 0xFF); + } + else + { + (*output_pointer)[0] = (unsigned char)(codepoint & 0x7F); + } + + *output_pointer += utf8_length; + + return sequence_length; + +fail: + return 0; +} + +/* Parse the input text into an unescaped cinput, and populate item. */ +static cJSON_bool parse_string(cJSON * const item, parse_buffer * const input_buffer) +{ + const unsigned char *input_pointer = buffer_at_offset(input_buffer) + 1; + const unsigned char *input_end = buffer_at_offset(input_buffer) + 1; + unsigned char *output_pointer = NULL; + unsigned char *output = NULL; + + /* not a string */ + if (buffer_at_offset(input_buffer)[0] != '\"') + { + goto fail; + } + + { + /* calculate approximate size of the output (overestimate) */ + size_t allocation_length = 0; + size_t skipped_bytes = 0; + while (((size_t)(input_end - input_buffer->content) < input_buffer->length) && (*input_end != '\"')) + { + /* is escape sequence */ + if (input_end[0] == '\\') + { + if ((size_t)(input_end + 1 - input_buffer->content) >= input_buffer->length) + { + /* prevent buffer overflow when last input character is a backslash */ + goto fail; + } + skipped_bytes++; + input_end++; + } + input_end++; + } + if (((size_t)(input_end - input_buffer->content) >= input_buffer->length) || (*input_end != '\"')) + { + goto fail; /* string ended unexpectedly */ + } + + /* This is at most how much we need for the output */ + allocation_length = (size_t)(input_end - buffer_at_offset(input_buffer)) - skipped_bytes; + output = (unsigned char*)input_buffer->hooks.allocate(allocation_length + sizeof("")); + if (output == NULL) + { + goto fail; /* allocation failure */ + } + } + + output_pointer = output; + /* loop through the string literal */ + while (input_pointer < input_end) + { + if (*input_pointer != '\\') + { + *output_pointer++ = *input_pointer++; + } + /* escape sequence */ + else + { + unsigned char sequence_length = 2; + if ((input_end - input_pointer) < 1) + { + goto fail; + } + + switch (input_pointer[1]) + { + case 'b': + *output_pointer++ = '\b'; + break; + case 'f': + *output_pointer++ = '\f'; + break; + case 'n': + *output_pointer++ = '\n'; + break; + case 'r': + *output_pointer++ = '\r'; + break; + case 't': + *output_pointer++ = '\t'; + break; + case '\"': + case '\\': + case '/': + *output_pointer++ = input_pointer[1]; + break; + + /* UTF-16 literal */ + case 'u': + sequence_length = utf16_literal_to_utf8(input_pointer, input_end, &output_pointer); + if (sequence_length == 0) + { + /* failed to convert UTF16-literal to UTF-8 */ + goto fail; + } + break; + + default: + goto fail; + } + input_pointer += sequence_length; + } + } + + /* zero terminate the output */ + *output_pointer = '\0'; + + item->type = cJSON_String; + item->valuestring = (char*)output; + + input_buffer->offset = (size_t)(input_end - input_buffer->content); + input_buffer->offset++; + + return true; + +fail: + if (output != NULL) + { + input_buffer->hooks.deallocate(output); + } + + if (input_pointer != NULL) + { + input_buffer->offset = (size_t)(input_pointer - input_buffer->content); + } + + return false; } /* Render the cstring provided to an escaped version that can be printed. */ -static char *print_string_ptr(const char *str,printbuffer *p) -{ - const char *ptr;char *ptr2,*out;int len=0,flag=0;unsigned char token; - - for (ptr=str;*ptr;ptr++) flag|=((*ptr>0 && *ptr<32)||(*ptr=='\"')||(*ptr=='\\'))?1:0; - if (!flag) - { - len=ptr-str; - if (p) out=ensure(p,len+3); - else out=(char*)cJSON_malloc(len+3); - if (!out) return 0; - ptr2=out;*ptr2++='\"'; - strcpy(ptr2,str); - ptr2[len]='\"'; - ptr2[len+1]=0; - return out; - } - - if (!str) - { - if (p) out=ensure(p,3); - else out=(char*)cJSON_malloc(3); - if (!out) return 0; - strcpy(out,"\"\""); - return out; - } - ptr=str;while ((token=*ptr) && ++len) {if (strchr("\"\\\b\f\n\r\t",token)) len++; else if (token<32) len+=5;ptr++;} - - if (p) out=ensure(p,len+3); - else out=(char*)cJSON_malloc(len+3); - if (!out) return 0; - - ptr2=out;ptr=str; - *ptr2++='\"'; - while (*ptr) - { - if ((unsigned char)*ptr>31 && *ptr!='\"' && *ptr!='\\') *ptr2++=*ptr++; - else - { - *ptr2++='\\'; - switch (token=*ptr++) - { - case '\\': *ptr2++='\\'; break; - case '\"': *ptr2++='\"'; break; - case '\b': *ptr2++='b'; break; - case '\f': *ptr2++='f'; break; - case '\n': *ptr2++='n'; break; - case '\r': *ptr2++='r'; break; - case '\t': *ptr2++='t'; break; - default: sprintf(ptr2,"u%04x",token);ptr2+=5; break; /* escape and print */ - } - } - } - *ptr2++='\"';*ptr2++=0; - return out; -} -/* Invote print_string_ptr (which is useful) on an item. */ -static char *print_string(cJSON *item,printbuffer *p) {return print_string_ptr(item->valuestring,p);} +static cJSON_bool print_string_ptr(const unsigned char * const input, printbuffer * const output_buffer) +{ + const unsigned char *input_pointer = NULL; + unsigned char *output = NULL; + unsigned char *output_pointer = NULL; + size_t output_length = 0; + /* numbers of additional characters needed for escaping */ + size_t escape_characters = 0; + + if (output_buffer == NULL) + { + return false; + } + + /* empty string */ + if (input == NULL) + { + output = ensure(output_buffer, sizeof("\"\"")); + if (output == NULL) + { + return false; + } + strcpy((char*)output, "\"\""); + + return true; + } + + /* set "flag" to 1 if something needs to be escaped */ + for (input_pointer = input; *input_pointer; input_pointer++) + { + switch (*input_pointer) + { + case '\"': + case '\\': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + /* one character escape sequence */ + escape_characters++; + break; + default: + if (*input_pointer < 32) + { + /* UTF-16 escape sequence uXXXX */ + escape_characters += 5; + } + break; + } + } + output_length = (size_t)(input_pointer - input) + escape_characters; + + output = ensure(output_buffer, output_length + sizeof("\"\"")); + if (output == NULL) + { + return false; + } + + /* no characters have to be escaped */ + if (escape_characters == 0) + { + output[0] = '\"'; + memcpy(output + 1, input, output_length); + output[output_length + 1] = '\"'; + output[output_length + 2] = '\0'; + + return true; + } + + output[0] = '\"'; + output_pointer = output + 1; + /* copy the string */ + for (input_pointer = input; *input_pointer != '\0'; (void)input_pointer++, output_pointer++) + { + if ((*input_pointer > 31) && (*input_pointer != '\"') && (*input_pointer != '\\')) + { + /* normal character, copy */ + *output_pointer = *input_pointer; + } + else + { + /* character needs to be escaped */ + *output_pointer++ = '\\'; + switch (*input_pointer) + { + case '\\': + *output_pointer = '\\'; + break; + case '\"': + *output_pointer = '\"'; + break; + case '\b': + *output_pointer = 'b'; + break; + case '\f': + *output_pointer = 'f'; + break; + case '\n': + *output_pointer = 'n'; + break; + case '\r': + *output_pointer = 'r'; + break; + case '\t': + *output_pointer = 't'; + break; + default: + /* escape and print as unicode codepoint */ + sprintf((char*)output_pointer, "u%04x", *input_pointer); + output_pointer += 4; + break; + } + } + } + output[output_length + 1] = '\"'; + output[output_length + 2] = '\0'; + + return true; +} + +/* Invoke print_string_ptr (which is useful) on an item. */ +static cJSON_bool print_string(const cJSON * const item, printbuffer * const p) +{ + return print_string_ptr((unsigned char*)item->valuestring, p); +} /* Predeclare these prototypes. */ -static const char *parse_value(cJSON *item,const char *value); -static char *print_value(cJSON *item,int depth,int fmt,printbuffer *p); -static const char *parse_array(cJSON *item,const char *value); -static char *print_array(cJSON *item,int depth,int fmt,printbuffer *p); -static const char *parse_object(cJSON *item,const char *value); -static char *print_object(cJSON *item,int depth,int fmt,printbuffer *p); +static cJSON_bool parse_value(cJSON * const item, parse_buffer * const input_buffer); +static cJSON_bool print_value(const cJSON * const item, printbuffer * const output_buffer); +static cJSON_bool parse_array(cJSON * const item, parse_buffer * const input_buffer); +static cJSON_bool print_array(const cJSON * const item, printbuffer * const output_buffer); +static cJSON_bool parse_object(cJSON * const item, parse_buffer * const input_buffer); +static cJSON_bool print_object(const cJSON * const item, printbuffer * const output_buffer); /* Utility to jump whitespace and cr/lf */ -static const char *skip(const char *in) {while (in && *in && (unsigned char)*in<=32) in++; return in;} +static parse_buffer *buffer_skip_whitespace(parse_buffer * const buffer) +{ + if ((buffer == NULL) || (buffer->content == NULL)) + { + return NULL; + } + + while (can_access_at_index(buffer, 0) && (buffer_at_offset(buffer)[0] <= 32)) + { + buffer->offset++; + } + + if (buffer->offset == buffer->length) + { + buffer->offset--; + } + + return buffer; +} -/* Parse an object - create a new root, and populate. */ -cJSON *cJSON_ParseWithOpts(const char *value,const char **return_parse_end,int require_null_terminated) +/* skip the UTF-8 BOM (byte order mark) if it is at the beginning of a buffer */ +static parse_buffer *skip_utf8_bom(parse_buffer * const buffer) { - const char *end=0; - cJSON *c=cJSON_New_Item(); - ep=0; - if (!c) return 0; /* memory fail */ + if ((buffer == NULL) || (buffer->content == NULL) || (buffer->offset != 0)) + { + return NULL; + } - end=parse_value(c,skip(value)); - if (!end) {cJSON_Delete(c);return 0;} /* parse failure. ep is set. */ + if (can_access_at_index(buffer, 4) && (strncmp((const char*)buffer_at_offset(buffer), "\xEF\xBB\xBF", 3) == 0)) + { + buffer->offset += 3; + } - /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */ - if (require_null_terminated) {end=skip(end);if (*end) {cJSON_Delete(c);ep=end;return 0;}} - if (return_parse_end) *return_parse_end=end; - return c; + return buffer; } -/* Default options for cJSON_Parse */ -cJSON *cJSON_Parse(const char *value) {return cJSON_ParseWithOpts(value,0,0);} -/* Render a cJSON item/entity/structure to text. */ -char *cJSON_Print(cJSON *item) {return print_value(item,0,1,0);} -char *cJSON_PrintUnformatted(cJSON *item) {return print_value(item,0,0,0);} +/* Parse an object - create a new root, and populate. */ +CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated) +{ + parse_buffer buffer = { 0, 0, 0, 0,{ 0, 0, 0 } }; + cJSON *item = NULL; + + /* reset error position */ + global_error.json = NULL; + global_error.position = 0; + + if (value == NULL) + { + goto fail; + } + + buffer.content = (const unsigned char*)value; + buffer.length = strlen((const char*)value) + sizeof(""); + buffer.offset = 0; + buffer.hooks = global_hooks; + + item = cJSON_New_Item(&global_hooks); + if (item == NULL) /* memory fail */ + { + goto fail; + } + + if (!parse_value(item, buffer_skip_whitespace(skip_utf8_bom(&buffer)))) + { + /* parse failure. ep is set. */ + goto fail; + } + + /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */ + if (require_null_terminated) + { + buffer_skip_whitespace(&buffer); + if ((buffer.offset >= buffer.length) || buffer_at_offset(&buffer)[0] != '\0') + { + goto fail; + } + } + if (return_parse_end) + { + *return_parse_end = (const char*)buffer_at_offset(&buffer); + } + + return item; + +fail: + if (item != NULL) + { + cJSON_Delete(item); + } + + if (value != NULL) + { + error local_error; + local_error.json = (const unsigned char*)value; + local_error.position = 0; + + if (buffer.offset < buffer.length) + { + local_error.position = buffer.offset; + } + else if (buffer.length > 0) + { + local_error.position = buffer.length - 1; + } + + if (return_parse_end != NULL) + { + *return_parse_end = (const char*)local_error.json + local_error.position; + } + + global_error = local_error; + } + + return NULL; +} -char *cJSON_PrintBuffered(cJSON *item,int prebuffer,int fmt) +/* Default options for cJSON_Parse */ +CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value) { - printbuffer p; - p.buffer=(char*)cJSON_malloc(prebuffer); - p.length=prebuffer; - p.offset=0; - return print_value(item,0,fmt,&p); - return p.buffer; + return cJSON_ParseWithOpts(value, 0, 0); } +#define cjson_min(a, b) ((a < b) ? a : b) -/* Parser core - when encountering text, process appropriately. */ -static const char *parse_value(cJSON *item,const char *value) +static unsigned char *print(const cJSON * const item, cJSON_bool format, const internal_hooks * const hooks) { - if (!value) return 0; /* Fail on null. */ - if (!strncmp(value,"null",4)) { item->type=cJSON_NULL; return value+4; } - if (!strncmp(value,"false",5)) { item->type=cJSON_False; return value+5; } - if (!strncmp(value,"true",4)) { item->type=cJSON_True; item->valueint=1; return value+4; } - if (*value=='\"') { return parse_string(item,value); } - if (*value=='-' || (*value>='0' && *value<='9')) { return parse_number(item,value); } - if (*value=='[') { return parse_array(item,value); } - if (*value=='{') { return parse_object(item,value); } + static const size_t default_buffer_size = 256; + printbuffer buffer[1]; + unsigned char *printed = NULL; + + memset(buffer, 0, sizeof(buffer)); + + /* create buffer */ + buffer->buffer = (unsigned char*)hooks->allocate(default_buffer_size); + buffer->length = default_buffer_size; + buffer->format = format; + buffer->hooks = *hooks; + if (buffer->buffer == NULL) + { + goto fail; + } + + /* print the value */ + if (!print_value(item, buffer)) + { + goto fail; + } + update_offset(buffer); + + /* check if reallocate is available */ + if (hooks->reallocate != NULL) + { + printed = (unsigned char*)hooks->reallocate(buffer->buffer, buffer->offset + 1); + if (printed == NULL) { + goto fail; + } + buffer->buffer = NULL; + } + else /* otherwise copy the JSON over to a new buffer */ + { + printed = (unsigned char*)hooks->allocate(buffer->offset + 1); + if (printed == NULL) + { + goto fail; + } + memcpy(printed, buffer->buffer, cjson_min(buffer->length, buffer->offset + 1)); + printed[buffer->offset] = '\0'; /* just to be sure */ + + /* free the buffer */ + hooks->deallocate(buffer->buffer); + } + + return printed; + +fail: + if (buffer->buffer != NULL) + { + hooks->deallocate(buffer->buffer); + } + + if (printed != NULL) + { + hooks->deallocate(printed); + } + + return NULL; +} - ep=value;return 0; /* failure. */ +/* Render a cJSON item/entity/structure to text. */ +CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item) +{ + return (char*)print(item, true, &global_hooks); } -/* Render a value to text. */ -static char *print_value(cJSON *item,int depth,int fmt,printbuffer *p) -{ - char *out=0; - if (!item) return 0; - if (p) - { - switch ((item->type)&255) - { - case cJSON_NULL: {out=ensure(p,5); if (out) strcpy(out,"null"); break;} - case cJSON_False: {out=ensure(p,6); if (out) strcpy(out,"false"); break;} - case cJSON_True: {out=ensure(p,5); if (out) strcpy(out,"true"); break;} - case cJSON_Number: out=print_number(item,p);break; - case cJSON_String: out=print_string(item,p);break; - case cJSON_Array: out=print_array(item,depth,fmt,p);break; - case cJSON_Object: out=print_object(item,depth,fmt,p);break; - } - } - else - { - switch ((item->type)&255) - { - case cJSON_NULL: out=cJSON_strdup("null"); break; - case cJSON_False: out=cJSON_strdup("false");break; - case cJSON_True: out=cJSON_strdup("true"); break; - case cJSON_Number: out=print_number(item,0);break; - case cJSON_String: out=print_string(item,0);break; - case cJSON_Array: out=print_array(item,depth,fmt,0);break; - case cJSON_Object: out=print_object(item,depth,fmt,0);break; - } - } - return out; +CJSON_PUBLIC(char *) cJSON_PrintUnformatted(const cJSON *item) +{ + return (char*)print(item, false, &global_hooks); } -/* Build an array from input text. */ -static const char *parse_array(cJSON *item,const char *value) +CJSON_PUBLIC(char *) cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt) { - cJSON *child; - if (*value!='[') {ep=value;return 0;} /* not an array! */ + printbuffer p = { 0, 0, 0, 0, 0, 0,{ 0, 0, 0 } }; + + if (prebuffer < 0) + { + return NULL; + } + + p.buffer = (unsigned char*)global_hooks.allocate((size_t)prebuffer); + if (!p.buffer) + { + return NULL; + } + + p.length = (size_t)prebuffer; + p.offset = 0; + p.noalloc = false; + p.format = fmt; + p.hooks = global_hooks; + + if (!print_value(item, &p)) + { + global_hooks.deallocate(p.buffer); + return NULL; + } + + return (char*)p.buffer; +} - item->type=cJSON_Array; - value=skip(value+1); - if (*value==']') return value+1; /* empty array. */ +CJSON_PUBLIC(cJSON_bool) cJSON_PrintPreallocated(cJSON *item, char *buf, const int len, const cJSON_bool fmt) +{ + printbuffer p = { 0, 0, 0, 0, 0, 0,{ 0, 0, 0 } }; - item->child=child=cJSON_New_Item(); - if (!item->child) return 0; /* memory fail */ - value=skip(parse_value(child,skip(value))); /* skip any spacing, get the value. */ - if (!value) return 0; + if ((len < 0) || (buf == NULL)) + { + return false; + } - while (*value==',') - { - cJSON *new_item; - if (!(new_item=cJSON_New_Item())) return 0; /* memory fail */ - child->next=new_item;new_item->prev=child;child=new_item; - value=skip(parse_value(child,skip(value+1))); - if (!value) return 0; /* memory fail */ - } + p.buffer = (unsigned char*)buf; + p.length = (size_t)len; + p.offset = 0; + p.noalloc = true; + p.format = fmt; + p.hooks = global_hooks; + + return print_value(item, &p); +} - if (*value==']') return value+1; /* end of array */ - ep=value;return 0; /* malformed. */ +/* Parser core - when encountering text, process appropriately. */ +static cJSON_bool parse_value(cJSON * const item, parse_buffer * const input_buffer) +{ + if ((input_buffer == NULL) || (input_buffer->content == NULL)) + { + return false; /* no input */ + } + + /* parse the different types of values */ + /* null */ + if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "null", 4) == 0)) + { + item->type = cJSON_NULL; + input_buffer->offset += 4; + return true; + } + /* false */ + if (can_read(input_buffer, 5) && (strncmp((const char*)buffer_at_offset(input_buffer), "false", 5) == 0)) + { + item->type = cJSON_False; + input_buffer->offset += 5; + return true; + } + /* true */ + if (can_read(input_buffer, 4) && (strncmp((const char*)buffer_at_offset(input_buffer), "true", 4) == 0)) + { + item->type = cJSON_True; + item->valueint = 1; + input_buffer->offset += 4; + return true; + } + /* string */ + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '\"')) + { + return parse_string(item, input_buffer); + } + /* number */ + if (can_access_at_index(input_buffer, 0) && ((buffer_at_offset(input_buffer)[0] == '-') || ((buffer_at_offset(input_buffer)[0] >= '0') && (buffer_at_offset(input_buffer)[0] <= '9')))) + { + return parse_number(item, input_buffer); + } + /* array */ + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '[')) + { + return parse_array(item, input_buffer); + } + /* object */ + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '{')) + { + return parse_object(item, input_buffer); + } + + return false; +} + +/* Render a value to text. */ +static cJSON_bool print_value(const cJSON * const item, printbuffer * const output_buffer) +{ + unsigned char *output = NULL; + + if ((item == NULL) || (output_buffer == NULL)) + { + return false; + } + + switch ((item->type) & 0xFF) + { + case cJSON_NULL: + output = ensure(output_buffer, 5); + if (output == NULL) + { + return false; + } + strcpy((char*)output, "null"); + return true; + + case cJSON_False: + output = ensure(output_buffer, 6); + if (output == NULL) + { + return false; + } + strcpy((char*)output, "false"); + return true; + + case cJSON_True: + output = ensure(output_buffer, 5); + if (output == NULL) + { + return false; + } + strcpy((char*)output, "true"); + return true; + + case cJSON_Number: + return print_number(item, output_buffer); + + case cJSON_Raw: + { + size_t raw_length = 0; + if (item->valuestring == NULL) + { + return false; + } + + raw_length = strlen(item->valuestring) + sizeof(""); + output = ensure(output_buffer, raw_length); + if (output == NULL) + { + return false; + } + memcpy(output, item->valuestring, raw_length); + return true; + } + + case cJSON_String: + return print_string(item, output_buffer); + + case cJSON_Array: + return print_array(item, output_buffer); + + case cJSON_Object: + return print_object(item, output_buffer); + + default: + return false; + } +} + +/* Build an array from input text. */ +static cJSON_bool parse_array(cJSON * const item, parse_buffer * const input_buffer) +{ + cJSON *head = NULL; /* head of the linked list */ + cJSON *current_item = NULL; + + if (input_buffer->depth >= CJSON_NESTING_LIMIT) + { + return false; /* to deeply nested */ + } + input_buffer->depth++; + + if (buffer_at_offset(input_buffer)[0] != '[') + { + /* not an array */ + goto fail; + } + + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ']')) + { + /* empty array */ + goto success; + } + + /* check if we skipped to the end of the buffer */ + if (cannot_access_at_index(input_buffer, 0)) + { + input_buffer->offset--; + goto fail; + } + + /* step back to character in front of the first element */ + input_buffer->offset--; + /* loop through the comma separated array elements */ + do + { + /* allocate next item */ + cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks)); + if (new_item == NULL) + { + goto fail; /* allocation failure */ + } + + /* attach next item to list */ + if (head == NULL) + { + /* start the linked list */ + current_item = head = new_item; + } + else + { + /* add to the end and advance */ + current_item->next = new_item; + new_item->prev = current_item; + current_item = new_item; + } + + /* parse next value */ + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (!parse_value(current_item, input_buffer)) + { + goto fail; /* failed to parse value */ + } + buffer_skip_whitespace(input_buffer); + } while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ',')); + + if (cannot_access_at_index(input_buffer, 0) || buffer_at_offset(input_buffer)[0] != ']') + { + goto fail; /* expected end of array */ + } + +success: + input_buffer->depth--; + + item->type = cJSON_Array; + item->child = head; + + input_buffer->offset++; + + return true; + +fail: + if (head != NULL) + { + cJSON_Delete(head); + } + + return false; } /* Render an array to text */ -static char *print_array(cJSON *item,int depth,int fmt,printbuffer *p) -{ - char **entries; - char *out=0,*ptr,*ret;int len=5; - cJSON *child=item->child; - int numentries=0,i=0,fail=0; - size_t tmplen=0; - - /* How many entries in the array? */ - while (child) numentries++,child=child->next; - /* Explicitly handle numentries==0 */ - if (!numentries) - { - if (p) out=ensure(p,3); - else out=(char*)cJSON_malloc(3); - if (out) strcpy(out,"[]"); - return out; - } - - if (p) - { - /* Compose the output array. */ - i=p->offset; - ptr=ensure(p,1);if (!ptr) return 0; *ptr='['; p->offset++; - child=item->child; - while (child && !fail) - { - print_value(child,depth+1,fmt,p); - p->offset=update(p); - if (child->next) {len=fmt?2:1;ptr=ensure(p,len+1);if (!ptr) return 0;*ptr++=',';if(fmt)*ptr++=' ';*ptr=0;p->offset+=len;} - child=child->next; - } - ptr=ensure(p,2);if (!ptr) return 0; *ptr++=']';*ptr=0; - out=(p->buffer)+i; - } - else - { - /* Allocate an array to hold the values for each */ - entries=(char**)cJSON_malloc(numentries*sizeof(char*)); - if (!entries) return 0; - memset(entries,0,numentries*sizeof(char*)); - /* Retrieve all the results: */ - child=item->child; - while (child && !fail) - { - ret=print_value(child,depth+1,fmt,0); - entries[i++]=ret; - if (ret) len+=strlen(ret)+2+(fmt?1:0); else fail=1; - child=child->next; - } - - /* If we didn't fail, try to malloc the output string */ - if (!fail) out=(char*)cJSON_malloc(len); - /* If that fails, we fail. */ - if (!out) fail=1; - - /* Handle failure. */ - if (fail) - { - for (i=0;ichild; + + if (output_buffer == NULL) + { + return false; + } + + /* Compose the output array. */ + /* opening square bracket */ + output_pointer = ensure(output_buffer, 1); + if (output_pointer == NULL) + { + return false; + } + + *output_pointer = '['; + output_buffer->offset++; + output_buffer->depth++; + + while (current_element != NULL) + { + if (!print_value(current_element, output_buffer)) + { + return false; + } + update_offset(output_buffer); + if (current_element->next) + { + length = (size_t)(output_buffer->format ? 2 : 1); + output_pointer = ensure(output_buffer, length + 1); + if (output_pointer == NULL) + { + return false; + } + *output_pointer++ = ','; + if (output_buffer->format) + { + *output_pointer++ = ' '; + } + *output_pointer = '\0'; + output_buffer->offset += length; + } + current_element = current_element->next; + } + + output_pointer = ensure(output_buffer, 2); + if (output_pointer == NULL) + { + return false; + } + *output_pointer++ = ']'; + *output_pointer = '\0'; + output_buffer->depth--; + + return true; } /* Build an object from the text. */ -static const char *parse_object(cJSON *item,const char *value) -{ - cJSON *child; - if (*value!='{') {ep=value;return 0;} /* not an object! */ - - item->type=cJSON_Object; - value=skip(value+1); - if (*value=='}') return value+1; /* empty array. */ - - item->child=child=cJSON_New_Item(); - if (!item->child) return 0; - value=skip(parse_string(child,skip(value))); - if (!value) return 0; - child->string=child->valuestring;child->valuestring=0; - if (*value!=':') {ep=value;return 0;} /* fail! */ - value=skip(parse_value(child,skip(value+1))); /* skip any spacing, get the value. */ - if (!value) return 0; - - while (*value==',') - { - cJSON *new_item; - if (!(new_item=cJSON_New_Item())) return 0; /* memory fail */ - child->next=new_item;new_item->prev=child;child=new_item; - value=skip(parse_string(child,skip(value+1))); - if (!value) return 0; - child->string=child->valuestring;child->valuestring=0; - if (*value!=':') {ep=value;return 0;} /* fail! */ - value=skip(parse_value(child,skip(value+1))); /* skip any spacing, get the value. */ - if (!value) return 0; - } - - if (*value=='}') return value+1; /* end of array */ - ep=value;return 0; /* malformed. */ +static cJSON_bool parse_object(cJSON * const item, parse_buffer * const input_buffer) +{ + cJSON *head = NULL; /* linked list head */ + cJSON *current_item = NULL; + + if (input_buffer->depth >= CJSON_NESTING_LIMIT) + { + return false; /* to deeply nested */ + } + input_buffer->depth++; + + if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '{')) + { + goto fail; /* not an object */ + } + + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '}')) + { + goto success; /* empty object */ + } + + /* check if we skipped to the end of the buffer */ + if (cannot_access_at_index(input_buffer, 0)) + { + input_buffer->offset--; + goto fail; + } + + /* step back to character in front of the first element */ + input_buffer->offset--; + /* loop through the comma separated array elements */ + do + { + /* allocate next item */ + cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks)); + if (new_item == NULL) + { + goto fail; /* allocation failure */ + } + + /* attach next item to list */ + if (head == NULL) + { + /* start the linked list */ + current_item = head = new_item; + } + else + { + /* add to the end and advance */ + current_item->next = new_item; + new_item->prev = current_item; + current_item = new_item; + } + + /* parse the name of the child */ + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (!parse_string(current_item, input_buffer)) + { + goto fail; /* faile to parse name */ + } + buffer_skip_whitespace(input_buffer); + + /* swap valuestring and string, because we parsed the name */ + current_item->string = current_item->valuestring; + current_item->valuestring = NULL; + + if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != ':')) + { + goto fail; /* invalid object */ + } + + /* parse the value */ + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (!parse_value(current_item, input_buffer)) + { + goto fail; /* failed to parse value */ + } + buffer_skip_whitespace(input_buffer); + } while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ',')); + + if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '}')) + { + goto fail; /* expected end of object */ + } + +success: + input_buffer->depth--; + + item->type = cJSON_Object; + item->child = head; + + input_buffer->offset++; + return true; + +fail: + if (head != NULL) + { + cJSON_Delete(head); + } + + return false; } /* Render an object to text. */ -static char *print_object(cJSON *item,int depth,int fmt,printbuffer *p) -{ - char **entries=0,**names=0; - char *out=0,*ptr,*ret,*str;int len=7,i=0,j; - cJSON *child=item->child; - int numentries=0,fail=0; - size_t tmplen=0; - /* Count the number of entries. */ - while (child) numentries++,child=child->next; - /* Explicitly handle empty object case */ - if (!numentries) - { - if (p) out=ensure(p,fmt?depth+4:3); - else out=(char*)cJSON_malloc(fmt?depth+4:3); - if (!out) return 0; - ptr=out;*ptr++='{'; - if (fmt) {*ptr++='\n';for (i=0;ioffset; - len=fmt?2:1; ptr=ensure(p,len+1); if (!ptr) return 0; - *ptr++='{'; if (fmt) *ptr++='\n'; *ptr=0; p->offset+=len; - child=item->child;depth++; - while (child) - { - if (fmt) - { - ptr=ensure(p,depth); if (!ptr) return 0; - for (j=0;joffset+=depth; - } - print_string_ptr(child->string,p); - p->offset=update(p); - - len=fmt?2:1; - ptr=ensure(p,len); if (!ptr) return 0; - *ptr++=':';if (fmt) *ptr++='\t'; - p->offset+=len; - - print_value(child,depth,fmt,p); - p->offset=update(p); - - len=(fmt?1:0)+(child->next?1:0); - ptr=ensure(p,len+1); if (!ptr) return 0; - if (child->next) *ptr++=','; - if (fmt) *ptr++='\n';*ptr=0; - p->offset+=len; - child=child->next; - } - ptr=ensure(p,fmt?(depth+1):2); if (!ptr) return 0; - if (fmt) for (i=0;ibuffer)+i; - } - else - { - /* Allocate space for the names and the objects */ - entries=(char**)cJSON_malloc(numentries*sizeof(char*)); - if (!entries) return 0; - names=(char**)cJSON_malloc(numentries*sizeof(char*)); - if (!names) {cJSON_free(entries);return 0;} - memset(entries,0,sizeof(char*)*numentries); - memset(names,0,sizeof(char*)*numentries); - - /* Collect all the results into our arrays: */ - child=item->child;depth++;if (fmt) len+=depth; - while (child) - { - names[i]=str=print_string_ptr(child->string,0); - entries[i++]=ret=print_value(child,depth,fmt,0); - if (str && ret) len+=strlen(ret)+strlen(str)+2+(fmt?2+depth:0); else fail=1; - child=child->next; - } - - /* Try to allocate the output string */ - if (!fail) out=(char*)cJSON_malloc(len); - if (!out) fail=1; - - /* Handle failure */ - if (fail) - { - for (i=0;ichild; + + if (output_buffer == NULL) + { + return false; + } + + /* Compose the output: */ + length = (size_t)(output_buffer->format ? 2 : 1); /* fmt: {\n */ + output_pointer = ensure(output_buffer, length + 1); + if (output_pointer == NULL) + { + return false; + } + + *output_pointer++ = '{'; + output_buffer->depth++; + if (output_buffer->format) + { + *output_pointer++ = '\n'; + } + output_buffer->offset += length; + + while (current_item) + { + if (output_buffer->format) + { + size_t i; + output_pointer = ensure(output_buffer, output_buffer->depth); + if (output_pointer == NULL) + { + return false; + } + for (i = 0; i < output_buffer->depth; i++) + { + *output_pointer++ = '\t'; + } + output_buffer->offset += output_buffer->depth; + } + + /* print key */ + if (!print_string_ptr((unsigned char*)current_item->string, output_buffer)) + { + return false; + } + update_offset(output_buffer); + + length = (size_t)(output_buffer->format ? 2 : 1); + output_pointer = ensure(output_buffer, length); + if (output_pointer == NULL) + { + return false; + } + *output_pointer++ = ':'; + if (output_buffer->format) + { + *output_pointer++ = '\t'; + } + output_buffer->offset += length; + + /* print value */ + if (!print_value(current_item, output_buffer)) + { + return false; + } + update_offset(output_buffer); + + /* print comma if not last */ + length = (size_t)((output_buffer->format ? 1 : 0) + (current_item->next ? 1 : 0)); + output_pointer = ensure(output_buffer, length + 1); + if (output_pointer == NULL) + { + return false; + } + if (current_item->next) + { + *output_pointer++ = ','; + } + + if (output_buffer->format) + { + *output_pointer++ = '\n'; + } + *output_pointer = '\0'; + output_buffer->offset += length; + + current_item = current_item->next; + } + + output_pointer = ensure(output_buffer, output_buffer->format ? (output_buffer->depth + 1) : 2); + if (output_pointer == NULL) + { + return false; + } + if (output_buffer->format) + { + size_t i; + for (i = 0; i < (output_buffer->depth - 1); i++) + { + *output_pointer++ = '\t'; + } + } + *output_pointer++ = '}'; + *output_pointer = '\0'; + output_buffer->depth--; + + return true; } /* Get Array size/item / object item. */ -int cJSON_GetArraySize(cJSON *array) {cJSON *c=array->child;int i=0;while(c)i++,c=c->next;return i;} -cJSON *cJSON_GetArrayItem(cJSON *array,int item) +CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array) +{ + cJSON *child = NULL; + size_t size = 0; + + if (array == NULL) + { + return 0; + } + + child = array->child; + + while (child != NULL) + { + size++; + child = child->next; + } + + /* FIXME: Can overflow here. Cannot be fixed without breaking the API */ + + return (int)size; +} + +static cJSON* get_array_item(const cJSON *array, size_t index) +{ + cJSON *current_child = NULL; + + if (array == NULL) + { + return NULL; + } + + current_child = array->child; + while ((current_child != NULL) && (index > 0)) + { + index--; + current_child = current_child->next; + } + + return current_child; +} + +CJSON_PUBLIC(cJSON *) cJSON_GetArrayItem(const cJSON *array, int index) { - cJSON *c = (array != NULL) ? array->child : NULL; - while ((c != NULL) && (item > 0)) - { - item--; - c = c->next; - } + if (index < 0) + { + return NULL; + } - return c; + return get_array_item(array, (size_t)index); } -cJSON *cJSON_GetObjectItem(cJSON *object, const char *string) +static cJSON *get_object_item(const cJSON * const object, const char * const name, const cJSON_bool case_sensitive) { - cJSON *c = (object != NULL) ? object->child : NULL; - while ((c != NULL) && (cJSON_strcasecmp(c->string, string))) - { - c = c->next; - } - return c; + cJSON *current_element = NULL; + + if ((object == NULL) || (name == NULL)) + { + return NULL; + } + + current_element = object->child; + if (case_sensitive) + { + while ((current_element != NULL) && (strcmp(name, current_element->string) != 0)) + { + current_element = current_element->next; + } + } + else + { + while ((current_element != NULL) && (case_insensitive_strcmp((const unsigned char*)name, (const unsigned char*)(current_element->string)) != 0)) + { + current_element = current_element->next; + } + } + + return current_element; +} + +CJSON_PUBLIC(cJSON *) cJSON_GetObjectItem(const cJSON * const object, const char * const string) +{ + return get_object_item(object, string, false); +} + +CJSON_PUBLIC(cJSON *) cJSON_GetObjectItemCaseSensitive(const cJSON * const object, const char * const string) +{ + return get_object_item(object, string, true); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_HasObjectItem(const cJSON *object, const char *string) +{ + return cJSON_GetObjectItem(object, string) ? 1 : 0; } /* Utility for array list handling. */ -static void suffix_object(cJSON *prev,cJSON *item) {prev->next=item;item->prev=prev;} +static void suffix_object(cJSON *prev, cJSON *item) +{ + prev->next = item; + item->prev = prev; +} + /* Utility for handling references. */ -static cJSON *create_reference(cJSON *item) {cJSON *ref=cJSON_New_Item();if (!ref) return 0;memcpy(ref,item,sizeof(cJSON));ref->string=0;ref->type|=cJSON_IsReference;ref->next=ref->prev=0;return ref;} +static cJSON *create_reference(const cJSON *item, const internal_hooks * const hooks) +{ + cJSON *reference = NULL; + if (item == NULL) + { + return NULL; + } + + reference = cJSON_New_Item(hooks); + if (reference == NULL) + { + return NULL; + } + + memcpy(reference, item, sizeof(cJSON)); + reference->string = NULL; + reference->type |= cJSON_IsReference; + reference->next = reference->prev = NULL; + return reference; +} + +static cJSON_bool add_item_to_array(cJSON *array, cJSON *item) +{ + cJSON *child = NULL; + + if ((item == NULL) || (array == NULL)) + { + return false; + } + + child = array->child; + + if (child == NULL) + { + /* list is empty, start new one */ + array->child = item; + } + else + { + /* append to the end */ + while (child->next) + { + child = child->next; + } + suffix_object(child, item); + } + + return true; +} /* Add item to array/object. */ -void cJSON_AddItemToArray(cJSON *array, cJSON *item) {cJSON *c=array->child;if (!item) return; if (!c) {array->child=item;} else {while (c && c->next) c=c->next; suffix_object(c,item);}} -void cJSON_AddItemToObject(cJSON *object,const char *string,cJSON *item) {if (!item) return; if (item->string) cJSON_free(item->string);item->string=cJSON_strdup(string);cJSON_AddItemToArray(object,item);} -void cJSON_AddItemToObjectCS(cJSON *object,const char *string,cJSON *item) {if (!item) return; if (!(item->type&cJSON_StringIsConst) && item->string) cJSON_free(item->string);item->string=(char*)string;item->type|=cJSON_StringIsConst;cJSON_AddItemToArray(object,item);} -void cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item) {cJSON_AddItemToArray(array,create_reference(item));} -void cJSON_AddItemReferenceToObject(cJSON *object,const char *string,cJSON *item) {cJSON_AddItemToObject(object,string,create_reference(item));} - -cJSON *cJSON_DetachItemFromArray(cJSON *array,int which) {cJSON *c=array->child;while (c && which>0) c=c->next,which--;if (!c) return 0; - if (c->prev) c->prev->next=c->next;if (c->next) c->next->prev=c->prev;if (c==array->child) array->child=c->next;c->prev=c->next=0;return c;} -void cJSON_DeleteItemFromArray(cJSON *array,int which) {cJSON_Delete(cJSON_DetachItemFromArray(array,which));} -cJSON *cJSON_DetachItemFromObject(cJSON *object,const char *string) {int i=0;cJSON *c=object->child;while (c && cJSON_strcasecmp(c->string,string)) i++,c=c->next;if (c) return cJSON_DetachItemFromArray(object,i);return 0;} -void cJSON_DeleteItemFromObject(cJSON *object,const char *string) {cJSON_Delete(cJSON_DetachItemFromObject(object,string));} +CJSON_PUBLIC(void) cJSON_AddItemToArray(cJSON *array, cJSON *item) +{ + add_item_to_array(array, item); +} + +#if defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5)))) +#pragma GCC diagnostic push +#endif +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif +/* helper function to cast away const */ +static void* cast_away_const(const void* string) +{ + return (void*)string; +} +#if defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5)))) +#pragma GCC diagnostic pop +#endif + + +static cJSON_bool add_item_to_object(cJSON * const object, const char * const string, cJSON * const item, const internal_hooks * const hooks, const cJSON_bool constant_key) +{ + char *new_key = NULL; + int new_type = cJSON_Invalid; + + if ((object == NULL) || (string == NULL) || (item == NULL)) + { + return false; + } + + if (constant_key) + { + new_key = (char*)cast_away_const(string); + new_type = item->type | cJSON_StringIsConst; + } + else + { + new_key = (char*)cJSON_strdup((const unsigned char*)string, hooks); + if (new_key == NULL) + { + return false; + } + + new_type = item->type & ~cJSON_StringIsConst; + } + + if (!(item->type & cJSON_StringIsConst) && (item->string != NULL)) + { + hooks->deallocate(item->string); + } + + item->string = new_key; + item->type = new_type; + + return add_item_to_array(object, item); +} + +CJSON_PUBLIC(void) cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item) +{ + add_item_to_object(object, string, item, &global_hooks, false); +} + +/* Add an item to an object with constant string as key */ +CJSON_PUBLIC(void) cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item) +{ + add_item_to_object(object, string, item, &global_hooks, true); +} + +CJSON_PUBLIC(void) cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item) +{ + if (array == NULL) + { + return; + } + + add_item_to_array(array, create_reference(item, &global_hooks)); +} + +CJSON_PUBLIC(void) cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item) +{ + if ((object == NULL) || (string == NULL)) + { + return; + } + + add_item_to_object(object, string, create_reference(item, &global_hooks), &global_hooks, false); +} + +CJSON_PUBLIC(cJSON*) cJSON_AddNullToObject(cJSON * const object, const char * const name) +{ + cJSON *null = cJSON_CreateNull(); + if (add_item_to_object(object, name, null, &global_hooks, false)) + { + return null; + } + + cJSON_Delete(null); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddTrueToObject(cJSON * const object, const char * const name) +{ + cJSON *true_item = cJSON_CreateTrue(); + if (add_item_to_object(object, name, true_item, &global_hooks, false)) + { + return true_item; + } + + cJSON_Delete(true_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddFalseToObject(cJSON * const object, const char * const name) +{ + cJSON *false_item = cJSON_CreateFalse(); + if (add_item_to_object(object, name, false_item, &global_hooks, false)) + { + return false_item; + } + + cJSON_Delete(false_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddBoolToObject(cJSON * const object, const char * const name, const cJSON_bool boolean) +{ + cJSON *bool_item = cJSON_CreateBool(boolean); + if (add_item_to_object(object, name, bool_item, &global_hooks, false)) + { + return bool_item; + } + + cJSON_Delete(bool_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddNumberToObject(cJSON * const object, const char * const name, const double number) +{ + cJSON *number_item = cJSON_CreateNumber(number); + if (add_item_to_object(object, name, number_item, &global_hooks, false)) + { + return number_item; + } + + cJSON_Delete(number_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddStringToObject(cJSON * const object, const char * const name, const char * const string) +{ + cJSON *string_item = cJSON_CreateString(string); + if (add_item_to_object(object, name, string_item, &global_hooks, false)) + { + return string_item; + } + + cJSON_Delete(string_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddRawToObject(cJSON * const object, const char * const name, const char * const raw) +{ + cJSON *raw_item = cJSON_CreateRaw(raw); + if (add_item_to_object(object, name, raw_item, &global_hooks, false)) + { + return raw_item; + } + + cJSON_Delete(raw_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddObjectToObject(cJSON * const object, const char * const name) +{ + cJSON *object_item = cJSON_CreateObject(); + if (add_item_to_object(object, name, object_item, &global_hooks, false)) + { + return object_item; + } + + cJSON_Delete(object_item); + return NULL; +} + +CJSON_PUBLIC(cJSON*) cJSON_AddArrayToObject(cJSON * const object, const char * const name) +{ + cJSON *array = cJSON_CreateArray(); + if (add_item_to_object(object, name, array, &global_hooks, false)) + { + return array; + } + + cJSON_Delete(array); + return NULL; +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemViaPointer(cJSON *parent, cJSON * const item) +{ + if ((parent == NULL) || (item == NULL)) + { + return NULL; + } + + if (item->prev != NULL) + { + /* not the first element */ + item->prev->next = item->next; + } + if (item->next != NULL) + { + /* not the last element */ + item->next->prev = item->prev; + } + + if (item == parent->child) + { + /* first element */ + parent->child = item->next; + } + /* make sure the detached item doesn't point anywhere anymore */ + item->prev = NULL; + item->next = NULL; + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which) +{ + if (which < 0) + { + return NULL; + } + + return cJSON_DetachItemViaPointer(array, get_array_item(array, (size_t)which)); +} + +CJSON_PUBLIC(void) cJSON_DeleteItemFromArray(cJSON *array, int which) +{ + cJSON_Delete(cJSON_DetachItemFromArray(array, which)); +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObject(cJSON *object, const char *string) +{ + cJSON *to_detach = cJSON_GetObjectItem(object, string); + + return cJSON_DetachItemViaPointer(object, to_detach); +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObjectCaseSensitive(cJSON *object, const char *string) +{ + cJSON *to_detach = cJSON_GetObjectItemCaseSensitive(object, string); + + return cJSON_DetachItemViaPointer(object, to_detach); +} + +CJSON_PUBLIC(void) cJSON_DeleteItemFromObject(cJSON *object, const char *string) +{ + cJSON_Delete(cJSON_DetachItemFromObject(object, string)); +} + +CJSON_PUBLIC(void) cJSON_DeleteItemFromObjectCaseSensitive(cJSON *object, const char *string) +{ + cJSON_Delete(cJSON_DetachItemFromObjectCaseSensitive(object, string)); +} /* Replace array/object items with new ones. */ -void cJSON_InsertItemInArray(cJSON *array,int which,cJSON *newitem) {cJSON *c=array->child;while (c && which>0) c=c->next,which--;if (!c) {cJSON_AddItemToArray(array,newitem);return;} - newitem->next=c;newitem->prev=c->prev;c->prev=newitem;if (c==array->child) array->child=newitem; else newitem->prev->next=newitem;} -void cJSON_ReplaceItemInArray(cJSON *array,int which,cJSON *newitem) {cJSON *c=array->child;while (c && which>0) c=c->next,which--;if (!c) return; - newitem->next=c->next;newitem->prev=c->prev;if (newitem->next) newitem->next->prev=newitem; - if (c==array->child) array->child=newitem; else newitem->prev->next=newitem;c->next=c->prev=0;cJSON_Delete(c);} -void cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem){int i=0;cJSON *c=object->child;while(c && cJSON_strcasecmp(c->string,string))i++,c=c->next;if(c){newitem->string=cJSON_strdup(string);cJSON_ReplaceItemInArray(object,i,newitem);}} +CJSON_PUBLIC(void) cJSON_InsertItemInArray(cJSON *array, int which, cJSON *newitem) +{ + cJSON *after_inserted = NULL; + + if (which < 0) + { + return; + } + + after_inserted = get_array_item(array, (size_t)which); + if (after_inserted == NULL) + { + add_item_to_array(array, newitem); + return; + } + + newitem->next = after_inserted; + newitem->prev = after_inserted->prev; + after_inserted->prev = newitem; + if (after_inserted == array->child) + { + array->child = newitem; + } + else + { + newitem->prev->next = newitem; + } +} + +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemViaPointer(cJSON * const parent, cJSON * const item, cJSON * replacement) +{ + if ((parent == NULL) || (replacement == NULL) || (item == NULL)) + { + return false; + } + + if (replacement == item) + { + return true; + } + + replacement->next = item->next; + replacement->prev = item->prev; + + if (replacement->next != NULL) + { + replacement->next->prev = replacement; + } + if (replacement->prev != NULL) + { + replacement->prev->next = replacement; + } + if (parent->child == item) + { + parent->child = replacement; + } + + item->next = NULL; + item->prev = NULL; + cJSON_Delete(item); + + return true; +} + +CJSON_PUBLIC(void) cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem) +{ + if (which < 0) + { + return; + } + + cJSON_ReplaceItemViaPointer(array, get_array_item(array, (size_t)which), newitem); +} + +static cJSON_bool replace_item_in_object(cJSON *object, const char *string, cJSON *replacement, cJSON_bool case_sensitive) +{ + if ((replacement == NULL) || (string == NULL)) + { + return false; + } + + /* replace the name in the replacement */ + if (!(replacement->type & cJSON_StringIsConst) && (replacement->string != NULL)) + { + cJSON_free(replacement->string); + } + replacement->string = (char*)cJSON_strdup((const unsigned char*)string, &global_hooks); + replacement->type &= ~cJSON_StringIsConst; + + cJSON_ReplaceItemViaPointer(object, get_object_item(object, string, case_sensitive), replacement); + + return true; +} + +CJSON_PUBLIC(void) cJSON_ReplaceItemInObject(cJSON *object, const char *string, cJSON *newitem) +{ + replace_item_in_object(object, string, newitem, false); +} + +CJSON_PUBLIC(void) cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object, const char *string, cJSON *newitem) +{ + replace_item_in_object(object, string, newitem, true); +} /* Create basic types: */ -cJSON *cJSON_CreateNull(void) {cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_NULL;return item;} -cJSON *cJSON_CreateTrue(void) {cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_True;return item;} -cJSON *cJSON_CreateFalse(void) {cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_False;return item;} -cJSON *cJSON_CreateBool(int b) {cJSON *item=cJSON_New_Item();if(item)item->type=b?cJSON_True:cJSON_False;return item;} -cJSON *cJSON_CreateNumber(double num) {cJSON *item=cJSON_New_Item();if(item){item->type=cJSON_Number;item->valuedouble=num;item->valueint=(int)num;}return item;} -cJSON *cJSON_CreateString(const char *string) {cJSON *item=cJSON_New_Item();if(item){item->type=cJSON_String;item->valuestring=cJSON_strdup(string);}return item;} -cJSON *cJSON_CreateArray(void) {cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_Array;return item;} -cJSON *cJSON_CreateObject(void) {cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_Object;return item;} +CJSON_PUBLIC(cJSON *) cJSON_CreateNull(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) + { + item->type = cJSON_NULL; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateTrue(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) + { + item->type = cJSON_True; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) + { + item->type = cJSON_False; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool b) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) + { + item->type = b ? cJSON_True : cJSON_False; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateNumber(double num) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) + { + item->type = cJSON_Number; + item->valuedouble = num; + + /* use saturation in case of overflow */ + if (num >= INT_MAX) + { + item->valueint = INT_MAX; + } + else if (num <= INT_MIN) + { + item->valueint = INT_MIN; + } + else + { + item->valueint = (int)num; + } + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateString(const char *string) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) + { + item->type = cJSON_String; + item->valuestring = (char*)cJSON_strdup((const unsigned char*)string, &global_hooks); + if (!item->valuestring) + { + cJSON_Delete(item); + return NULL; + } + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item != NULL) + { + item->type = cJSON_String | cJSON_IsReference; + item->valuestring = (char*)cast_away_const(string); + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item != NULL) { + item->type = cJSON_Object | cJSON_IsReference; + item->child = (cJSON*)cast_away_const(child); + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child) { + cJSON *item = cJSON_New_Item(&global_hooks); + if (item != NULL) { + item->type = cJSON_Array | cJSON_IsReference; + item->child = (cJSON*)cast_away_const(child); + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateRaw(const char *raw) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) + { + item->type = cJSON_Raw; + item->valuestring = (char*)cJSON_strdup((const unsigned char*)raw, &global_hooks); + if (!item->valuestring) + { + cJSON_Delete(item); + return NULL; + } + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) + { + item->type = cJSON_Array; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) + { + item->type = cJSON_Object; + } + + return item; +} /* Create Arrays: */ -cJSON *cJSON_CreateIntArray(const int *numbers,int count) {int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && ichild=n;else suffix_object(p,n);p=n;}return a;} -cJSON *cJSON_CreateFloatArray(const float *numbers,int count) {int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && ichild=n;else suffix_object(p,n);p=n;}return a;} -cJSON *cJSON_CreateDoubleArray(const double *numbers,int count) {int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && ichild=n;else suffix_object(p,n);p=n;}return a;} -cJSON *cJSON_CreateStringArray(const char **strings,int count) {int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && ichild=n;else suffix_object(p,n);p=n;}return a;} +CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (numbers == NULL)) + { + return NULL; + } + + a = cJSON_CreateArray(); + for (i = 0; a && (i < (size_t)count); i++) + { + n = cJSON_CreateNumber(numbers[i]); + if (!n) + { + cJSON_Delete(a); + return NULL; + } + if (!i) + { + a->child = n; + } + else + { + suffix_object(p, n); + } + p = n; + } + + return a; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (numbers == NULL)) + { + return NULL; + } + + a = cJSON_CreateArray(); + + for (i = 0; a && (i < (size_t)count); i++) + { + n = cJSON_CreateNumber((double)numbers[i]); + if (!n) + { + cJSON_Delete(a); + return NULL; + } + if (!i) + { + a->child = n; + } + else + { + suffix_object(p, n); + } + p = n; + } + + return a; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (numbers == NULL)) + { + return NULL; + } + + a = cJSON_CreateArray(); + + for (i = 0; a && (i < (size_t)count); i++) + { + n = cJSON_CreateNumber(numbers[i]); + if (!n) + { + cJSON_Delete(a); + return NULL; + } + if (!i) + { + a->child = n; + } + else + { + suffix_object(p, n); + } + p = n; + } + + return a; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char **strings, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (strings == NULL)) + { + return NULL; + } + + a = cJSON_CreateArray(); + + for (i = 0; a && (i < (size_t)count); i++) + { + n = cJSON_CreateString(strings[i]); + if (!n) + { + cJSON_Delete(a); + return NULL; + } + if (!i) + { + a->child = n; + } + else + { + suffix_object(p, n); + } + p = n; + } + + return a; +} /* Duplication */ -cJSON *cJSON_Duplicate(cJSON *item,int recurse) -{ - cJSON *newitem,*cptr,*nptr=0,*newchild; - /* Bail on bad ptr */ - if (!item) return 0; - /* Create new item */ - newitem=cJSON_New_Item(); - if (!newitem) return 0; - /* Copy over all vars */ - newitem->type=item->type&(~cJSON_IsReference),newitem->valueint=item->valueint,newitem->valuedouble=item->valuedouble; - if (item->valuestring) {newitem->valuestring=cJSON_strdup(item->valuestring); if (!newitem->valuestring) {cJSON_Delete(newitem);return 0;}} - if (item->string) {newitem->string=cJSON_strdup(item->string); if (!newitem->string) {cJSON_Delete(newitem);return 0;}} - /* If non-recursive, then we're done! */ - if (!recurse) return newitem; - /* Walk the ->next chain for the child. */ - cptr=item->child; - while (cptr) - { - newchild=cJSON_Duplicate(cptr,1); /* Duplicate (with recurse) each item in the ->next chain */ - if (!newchild) {cJSON_Delete(newitem);return 0;} - if (nptr) {nptr->next=newchild,newchild->prev=nptr;nptr=newchild;} /* If newitem->child already set, then crosswire ->prev and ->next and move on */ - else {newitem->child=newchild;nptr=newchild;} /* Set newitem->child and move to it */ - cptr=cptr->next; - } - return newitem; -} - -void cJSON_Minify(char *json) -{ - char *into=json; - while (*json) - { - if (*json==' ') json++; - else if (*json=='\t') json++; /* Whitespace characters. */ - else if (*json=='\r') json++; - else if (*json=='\n') json++; - else if (*json=='/' && json[1]=='/') while (*json && *json!='\n') json++; /* double-slash comments, to end of line. */ - else if (*json=='/' && json[1]=='*') {while (*json && !(*json=='*' && json[1]=='/')) json++;json+=2;} /* multiline comments. */ - else if (*json=='\"'){*into++=*json++;while (*json && *json!='\"'){if (*json=='\\') *into++=*json++;*into++=*json++;}*into++=*json++;} /* string literals, which are \" sensitive. */ - else *into++=*json++; /* All other characters. */ - } - *into=0; /* and null-terminate. */ +CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse) +{ + cJSON *newitem = NULL; + cJSON *child = NULL; + cJSON *next = NULL; + cJSON *newchild = NULL; + + /* Bail on bad ptr */ + if (!item) + { + goto fail; + } + /* Create new item */ + newitem = cJSON_New_Item(&global_hooks); + if (!newitem) + { + goto fail; + } + /* Copy over all vars */ + newitem->type = item->type & (~cJSON_IsReference); + newitem->valueint = item->valueint; + newitem->valuedouble = item->valuedouble; + if (item->valuestring) + { + newitem->valuestring = (char*)cJSON_strdup((unsigned char*)item->valuestring, &global_hooks); + if (!newitem->valuestring) + { + goto fail; + } + } + if (item->string) + { + newitem->string = (item->type&cJSON_StringIsConst) ? item->string : (char*)cJSON_strdup((unsigned char*)item->string, &global_hooks); + if (!newitem->string) + { + goto fail; + } + } + /* If non-recursive, then we're done! */ + if (!recurse) + { + return newitem; + } + /* Walk the ->next chain for the child. */ + child = item->child; + while (child != NULL) + { + newchild = cJSON_Duplicate(child, true); /* Duplicate (with recurse) each item in the ->next chain */ + if (!newchild) + { + goto fail; + } + if (next != NULL) + { + /* If newitem->child already set, then crosswire ->prev and ->next and move on */ + next->next = newchild; + newchild->prev = next; + next = newchild; + } + else + { + /* Set newitem->child and move to it */ + newitem->child = newchild; + next = newchild; + } + child = child->next; + } + + return newitem; + +fail: + if (newitem != NULL) + { + cJSON_Delete(newitem); + } + + return NULL; +} + +CJSON_PUBLIC(void) cJSON_Minify(char *json) +{ + unsigned char *into = (unsigned char*)json; + + if (json == NULL) + { + return; + } + + while (*json) + { + if (*json == ' ') + { + json++; + } + else if (*json == '\t') + { + /* Whitespace characters. */ + json++; + } + else if (*json == '\r') + { + json++; + } + else if (*json == '\n') + { + json++; + } + else if ((*json == '/') && (json[1] == '/')) + { + /* double-slash comments, to end of line. */ + while (*json && (*json != '\n')) + { + json++; + } + } + else if ((*json == '/') && (json[1] == '*')) + { + /* multiline comments. */ + while (*json && !((*json == '*') && (json[1] == '/'))) + { + json++; + } + json += 2; + } + else if (*json == '\"') + { + /* string literals, which are \" sensitive. */ + *into++ = (unsigned char)*json++; + while (*json && (*json != '\"')) + { + if (*json == '\\') + { + *into++ = (unsigned char)*json++; + } + *into++ = (unsigned char)*json++; + } + *into++ = (unsigned char)*json++; + } + else + { + /* All other characters. */ + *into++ = (unsigned char)*json++; + } + } + + /* and null-terminate. */ + *into = '\0'; } +CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_Invalid; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsFalse(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_False; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsTrue(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xff) == cJSON_True; +} + + +CJSON_PUBLIC(cJSON_bool) cJSON_IsBool(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & (cJSON_True | cJSON_False)) != 0; +} +CJSON_PUBLIC(cJSON_bool) cJSON_IsNull(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_NULL; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsNumber(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_Number; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsString(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_String; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsArray(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_Array; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsObject(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_Object; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON * const item) +{ + if (item == NULL) + { + return false; + } + + return (item->type & 0xFF) == cJSON_Raw; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive) +{ + if ((a == NULL) || (b == NULL) || ((a->type & 0xFF) != (b->type & 0xFF)) || cJSON_IsInvalid(a)) + { + return false; + } + + /* check if type is valid */ + switch (a->type & 0xFF) + { + case cJSON_False: + case cJSON_True: + case cJSON_NULL: + case cJSON_Number: + case cJSON_String: + case cJSON_Raw: + case cJSON_Array: + case cJSON_Object: + break; + + default: + return false; + } + + /* identical objects are equal */ + if (a == b) + { + return true; + } + + switch (a->type & 0xFF) + { + /* in these cases and equal type is enough */ + case cJSON_False: + case cJSON_True: + case cJSON_NULL: + return true; + + case cJSON_Number: + if (a->valuedouble == b->valuedouble) + { + return true; + } + return false; + + case cJSON_String: + case cJSON_Raw: + if ((a->valuestring == NULL) || (b->valuestring == NULL)) + { + return false; + } + if (strcmp(a->valuestring, b->valuestring) == 0) + { + return true; + } + + return false; + + case cJSON_Array: + { + cJSON *a_element = a->child; + cJSON *b_element = b->child; + + for (; (a_element != NULL) && (b_element != NULL);) + { + if (!cJSON_Compare(a_element, b_element, case_sensitive)) + { + return false; + } + + a_element = a_element->next; + b_element = b_element->next; + } + + /* one of the arrays is longer than the other */ + if (a_element != b_element) { + return false; + } + + return true; + } + + case cJSON_Object: + { + cJSON *a_element = NULL; + cJSON *b_element = NULL; + cJSON_ArrayForEach(a_element, a) + { + /* TODO This has O(n^2) runtime, which is horrible! */ + b_element = get_object_item(b, a_element->string, case_sensitive); + if (b_element == NULL) + { + return false; + } + + if (!cJSON_Compare(a_element, b_element, case_sensitive)) + { + return false; + } + } + + /* doing this twice, once on a and b to prevent true comparison if a subset of b + * TODO: Do this the proper way, this is just a fix for now */ + cJSON_ArrayForEach(b_element, b) + { + a_element = get_object_item(a, b_element->string, case_sensitive); + if (a_element == NULL) + { + return false; + } + + if (!cJSON_Compare(b_element, a_element, case_sensitive)) + { + return false; + } + } + + return true; + } + + default: + return false; + } +} + +CJSON_PUBLIC(void *) cJSON_malloc(size_t size) +{ + return global_hooks.allocate(size); +} + +CJSON_PUBLIC(void) cJSON_free(void *object) +{ + global_hooks.deallocate(object); +} \ No newline at end of file diff --git a/source/code/cjson/cJSON.h b/source/code/cjson/cJSON.h index 662948612..d4a2dfed3 100644 --- a/source/code/cjson/cJSON.h +++ b/source/code/cjson/cJSON.h @@ -1,147 +1,285 @@ /* - Copyright (c) 2009 Dave Gamble - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. +Copyright (c) 2009-2017 Dave Gamble and cJSON contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. */ #ifndef cJSON__h #define cJSON__h + #ifdef __cplusplus extern "C" { #endif -/* cJSON Types: */ -#define cJSON_False 0 -#define cJSON_True 1 -#define cJSON_NULL 2 -#define cJSON_Number 3 -#define cJSON_String 4 -#define cJSON_Array 5 -#define cJSON_Object 6 - +#if !defined(__WINDOWS__) && (defined(WIN32) || defined(WIN64) || defined(_MSC_VER) || defined(_WIN32)) +#define __WINDOWS__ +#endif + +#ifdef __WINDOWS__ + + /* When compiling for windows, we specify a specific calling convention to avoid issues where we are being called from a project with a different default calling convention. For windows you have 3 define options: + + CJSON_HIDE_SYMBOLS - Define this in the case where you don't want to ever dllexport symbols + CJSON_EXPORT_SYMBOLS - Define this on library build when you want to dllexport symbols (default) + CJSON_IMPORT_SYMBOLS - Define this if you want to dllimport symbol + + For *nix builds that support visibility attribute, you can define similar behavior by + + setting default visibility to hidden by adding + -fvisibility=hidden (for gcc) + or + -xldscope=hidden (for sun cc) + to CFLAGS + + then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJSON_EXPORT_SYMBOLS does + + */ + +#define CJSON_CDECL __cdecl +#define CJSON_STDCALL __stdcall + + /* export symbols by default, this is necessary for copy pasting the C and header file */ +#if !defined(CJSON_HIDE_SYMBOLS) && !defined(CJSON_IMPORT_SYMBOLS) && !defined(CJSON_EXPORT_SYMBOLS) +#define CJSON_EXPORT_SYMBOLS +#endif + +#if defined(CJSON_HIDE_SYMBOLS) +#define CJSON_PUBLIC(type) type CJSON_STDCALL +#elif defined(CJSON_EXPORT_SYMBOLS) +#define CJSON_PUBLIC(type) __declspec(dllexport) type CJSON_STDCALL +#elif defined(CJSON_IMPORT_SYMBOLS) +#define CJSON_PUBLIC(type) __declspec(dllimport) type CJSON_STDCALL +#endif +#else /* !__WINDOWS__ */ +#define CJSON_CDECL +#define CJSON_STDCALL + +#if (defined(__GNUC__) || defined(__SUNPRO_CC) || defined (__SUNPRO_C)) && defined(CJSON_API_VISIBILITY) +#define CJSON_PUBLIC(type) __attribute__((visibility("default"))) type +#else +#define CJSON_PUBLIC(type) type +#endif +#endif + + /* project version */ +#define CJSON_VERSION_MAJOR 1 +#define CJSON_VERSION_MINOR 7 +#define CJSON_VERSION_PATCH 8 + +#include + + /* cJSON Types: */ +#define cJSON_Invalid (0) +#define cJSON_False (1 << 0) +#define cJSON_True (1 << 1) +#define cJSON_NULL (1 << 2) +#define cJSON_Number (1 << 3) +#define cJSON_String (1 << 4) +#define cJSON_Array (1 << 5) +#define cJSON_Object (1 << 6) +#define cJSON_Raw (1 << 7) /* raw json */ + #define cJSON_IsReference 256 #define cJSON_StringIsConst 512 -/* The cJSON structure: */ - typedef struct cJSON { - struct cJSON *next,*prev; /* next/prev allow you to walk array/object chains. Alternatively, use GetArraySize/GetArrayItem/GetObjectItem */ - struct cJSON *child; /* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */ - - int type; /* The type of the item, as above. */ - - char *valuestring; /* The item's string, if type==cJSON_String */ - int valueint; /* The item's number, if type==cJSON_Number */ - double valuedouble; /* The item's number, if type==cJSON_Number */ - - char *string; /* The item's name string, if this item is the child of, or is in the list of subitems of an object. */ - } cJSON; - - typedef struct cJSON_Hooks { - void *(*malloc_fn)(size_t sz); - void (*free_fn)(void *ptr); - } cJSON_Hooks; - -/* Supply malloc, realloc and free functions to cJSON */ - extern void cJSON_InitHooks(cJSON_Hooks* hooks); - - -/* Supply a block of JSON, and this returns a cJSON object you can interrogate. Call cJSON_Delete when finished. */ - extern cJSON *cJSON_Parse(const char *value); -/* Render a cJSON entity to text for transfer/storage. Free the char* when finished. */ - extern char *cJSON_Print(cJSON *item); -/* Render a cJSON entity to text for transfer/storage without any formatting. Free the char* when finished. */ - extern char *cJSON_PrintUnformatted(cJSON *item); -/* Render a cJSON entity to text using a buffered strategy. prebuffer is a guess at the final size. guessing well reduces reallocation. fmt=0 gives unformatted, =1 gives formatted */ - extern char *cJSON_PrintBuffered(cJSON *item,int prebuffer,int fmt); -/* Delete a cJSON entity and all subentities. */ - extern void cJSON_Delete(cJSON *c); - -/* Returns the number of items in an array (or object). */ - extern int cJSON_GetArraySize(cJSON *array); -/* Retrieve item number "item" from array "array". Returns NULL if unsuccessful. */ - extern cJSON *cJSON_GetArrayItem(cJSON *array,int item); -/* Get item "string" from object. Case insensitive. */ - extern cJSON *cJSON_GetObjectItem(cJSON *object,const char *string); - -/* For analysing failed parses. This returns a pointer to the parse error. You'll probably need to look a few chars back to make sense of it. Defined when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */ - extern const char *cJSON_GetErrorPtr(void); - -/* These calls create a cJSON item of the appropriate type. */ - extern cJSON *cJSON_CreateNull(void); - extern cJSON *cJSON_CreateTrue(void); - extern cJSON *cJSON_CreateFalse(void); - extern cJSON *cJSON_CreateBool(int b); - extern cJSON *cJSON_CreateNumber(double num); - extern cJSON *cJSON_CreateString(const char *string); - extern cJSON *cJSON_CreateArray(void); - extern cJSON *cJSON_CreateObject(void); - -/* These utilities create an Array of count items. */ - extern cJSON *cJSON_CreateIntArray(const int *numbers,int count); - extern cJSON *cJSON_CreateFloatArray(const float *numbers,int count); - extern cJSON *cJSON_CreateDoubleArray(const double *numbers,int count); - extern cJSON *cJSON_CreateStringArray(const char **strings,int count); - -/* Append item to the specified array/object. */ - extern void cJSON_AddItemToArray(cJSON *array, cJSON *item); - extern void cJSON_AddItemToObject(cJSON *object,const char *string,cJSON *item); - extern void cJSON_AddItemToObjectCS(cJSON *object,const char *string,cJSON *item); /* Use this when string is definitely const (i.e. a literal, or as good as), and will definitely survive the cJSON object */ -/* Append reference to item to the specified array/object. Use this when you want to add an existing cJSON to a new cJSON, but don't want to corrupt your existing cJSON. */ - extern void cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item); - extern void cJSON_AddItemReferenceToObject(cJSON *object,const char *string,cJSON *item); - -/* Remove/Detatch items from Arrays/Objects. */ - extern cJSON *cJSON_DetachItemFromArray(cJSON *array,int which); - extern void cJSON_DeleteItemFromArray(cJSON *array,int which); - extern cJSON *cJSON_DetachItemFromObject(cJSON *object,const char *string); - extern void cJSON_DeleteItemFromObject(cJSON *object,const char *string); - -/* Update array items. */ - extern void cJSON_InsertItemInArray(cJSON *array,int which,cJSON *newitem); /* Shifts pre-existing items to the right. */ - extern void cJSON_ReplaceItemInArray(cJSON *array,int which,cJSON *newitem); - extern void cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem); - -/* Duplicate a cJSON item */ - extern cJSON *cJSON_Duplicate(cJSON *item,int recurse); -/* Duplicate will create a new, identical cJSON item to the one you pass, in new memory that will - need to be released. With recurse!=0, it will duplicate any children connected to the item. - The item->next and ->prev pointers are always zero on return from Duplicate. */ - -/* ParseWithOpts allows you to require (and check) that the JSON is null terminated, and to retrieve the pointer to the final byte parsed. */ - extern cJSON *cJSON_ParseWithOpts(const char *value,const char **return_parse_end,int require_null_terminated); - - extern void cJSON_Minify(char *json); - -/* Macros for creating things quickly. */ -#define cJSON_AddNullToObject(object,name) cJSON_AddItemToObject(object, name, cJSON_CreateNull()) -#define cJSON_AddTrueToObject(object,name) cJSON_AddItemToObject(object, name, cJSON_CreateTrue()) -#define cJSON_AddFalseToObject(object,name) cJSON_AddItemToObject(object, name, cJSON_CreateFalse()) -#define cJSON_AddBoolToObject(object,name,b) cJSON_AddItemToObject(object, name, cJSON_CreateBool(b)) -#define cJSON_AddNumberToObject(object,name,n) cJSON_AddItemToObject(object, name, cJSON_CreateNumber(n)) -#define cJSON_AddStringToObject(object,name,s) cJSON_AddItemToObject(object, name, cJSON_CreateString(s)) - -/* When assigning an integer value, it needs to be propagated to valuedouble too. */ -#define cJSON_SetIntValue(object,val) ((object)?(object)->valueint=(object)->valuedouble=(val):(val)) -#define cJSON_SetNumberValue(object,val) ((object)?(object)->valueint=(object)->valuedouble=(val):(val)) + /* The cJSON structure: */ + typedef struct cJSON + { + /* next/prev allow you to walk array/object chains. Alternatively, use GetArraySize/GetArrayItem/GetObjectItem */ + struct cJSON *next; + struct cJSON *prev; + /* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */ + struct cJSON *child; + + /* The type of the item, as above. */ + int type; + + /* The item's string, if type==cJSON_String and type == cJSON_Raw */ + char *valuestring; + /* writing to valueint is DEPRECATED, use cJSON_SetNumberValue instead */ + int valueint; + /* The item's number, if type==cJSON_Number */ + double valuedouble; + + /* The item's name string, if this item is the child of, or is in the list of subitems of an object. */ + char *string; + } cJSON; + + typedef struct cJSON_Hooks + { + /* malloc/free are CDECL on Windows regardless of the default calling convention of the compiler, so ensure the hooks allow passing those functions directly. */ + void *(CJSON_CDECL *malloc_fn)(size_t sz); + void (CJSON_CDECL *free_fn)(void *ptr); + } cJSON_Hooks; + + typedef int cJSON_bool; + + /* Limits how deeply nested arrays/objects can be before cJSON rejects to parse them. + * This is to prevent stack overflows. */ +#ifndef CJSON_NESTING_LIMIT +#define CJSON_NESTING_LIMIT 1000 +#endif + + /* returns the version of cJSON as a string */ + CJSON_PUBLIC(const char*) cJSON_Version(void); + + /* Supply malloc, realloc and free functions to cJSON */ + CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks); + + /* Memory Management: the caller is always responsible to free the results from all variants of cJSON_Parse (with cJSON_Delete) and cJSON_Print (with stdlib free, cJSON_Hooks.free_fn, or cJSON_free as appropriate). The exception is cJSON_PrintPreallocated, where the caller has full responsibility of the buffer. */ + /* Supply a block of JSON, and this returns a cJSON object you can interrogate. */ + CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value); + /* ParseWithOpts allows you to require (and check) that the JSON is null terminated, and to retrieve the pointer to the final byte parsed. */ + /* If you supply a ptr in return_parse_end and parsing fails, then return_parse_end will contain a pointer to the error so will match cJSON_GetErrorPtr(). */ + CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated); + + /* Render a cJSON entity to text for transfer/storage. */ + CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item); + /* Render a cJSON entity to text for transfer/storage without any formatting. */ + CJSON_PUBLIC(char *) cJSON_PrintUnformatted(const cJSON *item); + /* Render a cJSON entity to text using a buffered strategy. prebuffer is a guess at the final size. guessing well reduces reallocation. fmt=0 gives unformatted, =1 gives formatted */ + CJSON_PUBLIC(char *) cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt); + /* Render a cJSON entity to text using a buffer already allocated in memory with given length. Returns 1 on success and 0 on failure. */ + /* NOTE: cJSON is not always 100% accurate in estimating how much memory it will use, so to be safe allocate 5 bytes more than you actually need */ + CJSON_PUBLIC(cJSON_bool) cJSON_PrintPreallocated(cJSON *item, char *buffer, const int length, const cJSON_bool format); + /* Delete a cJSON entity and all subentities. */ + CJSON_PUBLIC(void) cJSON_Delete(cJSON *c); + + /* Returns the number of items in an array (or object). */ + CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array); + /* Retrieve item number "index" from array "array". Returns NULL if unsuccessful. */ + CJSON_PUBLIC(cJSON *) cJSON_GetArrayItem(const cJSON *array, int index); + /* Get item "string" from object. Case insensitive. */ + CJSON_PUBLIC(cJSON *) cJSON_GetObjectItem(const cJSON * const object, const char * const string); + CJSON_PUBLIC(cJSON *) cJSON_GetObjectItemCaseSensitive(const cJSON * const object, const char * const string); + CJSON_PUBLIC(cJSON_bool) cJSON_HasObjectItem(const cJSON *object, const char *string); + /* For analysing failed parses. This returns a pointer to the parse error. You'll probably need to look a few chars back to make sense of it. Defined when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */ + CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void); + + /* Check if the item is a string and return its valuestring */ + CJSON_PUBLIC(char *) cJSON_GetStringValue(cJSON *item); + + /* These functions check the type of an item */ + CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON * const item); + CJSON_PUBLIC(cJSON_bool) cJSON_IsFalse(const cJSON * const item); + CJSON_PUBLIC(cJSON_bool) cJSON_IsTrue(const cJSON * const item); + CJSON_PUBLIC(cJSON_bool) cJSON_IsBool(const cJSON * const item); + CJSON_PUBLIC(cJSON_bool) cJSON_IsNull(const cJSON * const item); + CJSON_PUBLIC(cJSON_bool) cJSON_IsNumber(const cJSON * const item); + CJSON_PUBLIC(cJSON_bool) cJSON_IsString(const cJSON * const item); + CJSON_PUBLIC(cJSON_bool) cJSON_IsArray(const cJSON * const item); + CJSON_PUBLIC(cJSON_bool) cJSON_IsObject(const cJSON * const item); + CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON * const item); + + /* These calls create a cJSON item of the appropriate type. */ + CJSON_PUBLIC(cJSON *) cJSON_CreateNull(void); + CJSON_PUBLIC(cJSON *) cJSON_CreateTrue(void); + CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void); + CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool boolean); + CJSON_PUBLIC(cJSON *) cJSON_CreateNumber(double num); + CJSON_PUBLIC(cJSON *) cJSON_CreateString(const char *string); + /* raw json */ + CJSON_PUBLIC(cJSON *) cJSON_CreateRaw(const char *raw); + CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void); + CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void); + + /* Create a string where valuestring references a string so + * it will not be freed by cJSON_Delete */ + CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string); + /* Create an object/arrray that only references it's elements so + * they will not be freed by cJSON_Delete */ + CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child); + CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child); + + /* These utilities create an Array of count items. */ + CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count); + CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count); + CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count); + CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char **strings, int count); + + /* Append item to the specified array/object. */ + CJSON_PUBLIC(void) cJSON_AddItemToArray(cJSON *array, cJSON *item); + CJSON_PUBLIC(void) cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item); + /* Use this when string is definitely const (i.e. a literal, or as good as), and will definitely survive the cJSON object. + * WARNING: When this function was used, make sure to always check that (item->type & cJSON_StringIsConst) is zero before + * writing to `item->string` */ + CJSON_PUBLIC(void) cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item); + /* Append reference to item to the specified array/object. Use this when you want to add an existing cJSON to a new cJSON, but don't want to corrupt your existing cJSON. */ + CJSON_PUBLIC(void) cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item); + CJSON_PUBLIC(void) cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item); + + /* Remove/Detatch items from Arrays/Objects. */ + CJSON_PUBLIC(cJSON *) cJSON_DetachItemViaPointer(cJSON *parent, cJSON * const item); + CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which); + CJSON_PUBLIC(void) cJSON_DeleteItemFromArray(cJSON *array, int which); + CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObject(cJSON *object, const char *string); + CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObjectCaseSensitive(cJSON *object, const char *string); + CJSON_PUBLIC(void) cJSON_DeleteItemFromObject(cJSON *object, const char *string); + CJSON_PUBLIC(void) cJSON_DeleteItemFromObjectCaseSensitive(cJSON *object, const char *string); + + /* Update array items. */ + CJSON_PUBLIC(void) cJSON_InsertItemInArray(cJSON *array, int which, cJSON *newitem); /* Shifts pre-existing items to the right. */ + CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemViaPointer(cJSON * const parent, cJSON * const item, cJSON * replacement); + CJSON_PUBLIC(void) cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem); + CJSON_PUBLIC(void) cJSON_ReplaceItemInObject(cJSON *object, const char *string, cJSON *newitem); + CJSON_PUBLIC(void) cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object, const char *string, cJSON *newitem); + + /* Duplicate a cJSON item */ + CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse); + /* Duplicate will create a new, identical cJSON item to the one you pass, in new memory that will + need to be released. With recurse!=0, it will duplicate any children connected to the item. + The item->next and ->prev pointers are always zero on return from Duplicate. */ + /* Recursively compare two cJSON items for equality. If either a or b is NULL or invalid, they will be considered unequal. + * case_sensitive determines if object keys are treated case sensitive (1) or case insensitive (0) */ + CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive); + + + CJSON_PUBLIC(void) cJSON_Minify(char *json); + + /* Helper functions for creating and adding items to an object at the same time. + * They return the added item or NULL on failure. */ + CJSON_PUBLIC(cJSON*) cJSON_AddNullToObject(cJSON * const object, const char * const name); + CJSON_PUBLIC(cJSON*) cJSON_AddTrueToObject(cJSON * const object, const char * const name); + CJSON_PUBLIC(cJSON*) cJSON_AddFalseToObject(cJSON * const object, const char * const name); + CJSON_PUBLIC(cJSON*) cJSON_AddBoolToObject(cJSON * const object, const char * const name, const cJSON_bool boolean); + CJSON_PUBLIC(cJSON*) cJSON_AddNumberToObject(cJSON * const object, const char * const name, const double number); + CJSON_PUBLIC(cJSON*) cJSON_AddStringToObject(cJSON * const object, const char * const name, const char * const string); + CJSON_PUBLIC(cJSON*) cJSON_AddRawToObject(cJSON * const object, const char * const name, const char * const raw); + CJSON_PUBLIC(cJSON*) cJSON_AddObjectToObject(cJSON * const object, const char * const name); + CJSON_PUBLIC(cJSON*) cJSON_AddArrayToObject(cJSON * const object, const char * const name); + + /* When assigning an integer value, it needs to be propagated to valuedouble too. */ +#define cJSON_SetIntValue(object, number) ((object) ? (object)->valueint = (object)->valuedouble = (number) : (number)) + /* helper for the cJSON_SetNumberValue macro */ + CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number); +#define cJSON_SetNumberValue(object, number) ((object != NULL) ? cJSON_SetNumberHelper(object, (double)number) : (number)) + + /* Macro for iterating over an array or object */ +#define cJSON_ArrayForEach(element, array) for(element = (array != NULL) ? (array)->child : NULL; element != NULL; element = element->next) + + /* malloc/free objects using the malloc/free functions that have been set with cJSON_InitHooks */ + CJSON_PUBLIC(void *) cJSON_malloc(size_t size); + CJSON_PUBLIC(void) cJSON_free(void *object); #ifdef __cplusplus } #endif -#endif + +#endif \ No newline at end of file diff --git a/source/code/providers/Container_ContainerInventory_Class_Provider.cpp b/source/code/providers/Container_ContainerInventory_Class_Provider.cpp index 7fdd746a1..68c13053a 100644 --- a/source/code/providers/Container_ContainerInventory_Class_Provider.cpp +++ b/source/code/providers/Container_ContainerInventory_Class_Provider.cpp @@ -103,11 +103,11 @@ class ContainerQuery { cJSON* entry = cJSON_GetArrayItem(response[0], i); - if (entry) + if (entry != NULL) { cJSON* tags = cJSON_GetObjectItem(entry, "RepoTags"); - if (tags && cJSON_GetArraySize(tags)) + if ((tags != NULL) && cJSON_GetArraySize(tags)) { string value = ""; cJSON* arrItem = cJSON_GetArrayItem(tags, 0); @@ -168,7 +168,7 @@ class ContainerQuery try { cJSON* config = cJSON_GetObjectItem(entry, "Config"); - if (config) + if (config != NULL) { // Hostname of container string hostnamevalue = ""; @@ -232,11 +232,11 @@ class ContainerQuery // Compose group instance.ComposeGroup_value(""); - if (labels) + if (labels != NULL) { cJSON* groupName = cJSON_GetObjectItem(labels, "com.docker.compose.project"); - if (groupName) + if (groupName != NULL) { instance.ComposeGroup_value(groupName->valuestring); } @@ -244,7 +244,10 @@ class ContainerQuery } else { - syslog(LOG_WARNING, "Attempt in ObtainContainerConfig to get container %s config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + if (cJSON_GetObjectItem(entry, "Id") != NULL) + { + syslog(LOG_WARNING, "Attempt in ObtainContainerConfig to get container %s config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + } } } catch (std::exception &e) @@ -268,7 +271,7 @@ class ContainerQuery try { cJSON* state = cJSON_GetObjectItem(entry, "State"); - if (state) + if (state != NULL) { cJSON* objItem = cJSON_GetObjectItem(state, "ExitCode"); if (objItem != NULL) @@ -278,7 +281,10 @@ class ContainerQuery if (exitCode < 0) { exitCode = 128; - syslog(LOG_NOTICE, "Container %s returned negative exit code", cJSON_GetObjectItem(entry, "Id")->valuestring); + if (cJSON_GetObjectItem(entry, "Id") != NULL) + { + syslog(LOG_NOTICE, "Container %s returned negative exit code", cJSON_GetObjectItem(entry, "Id")->valuestring); + } } instance.ExitCode_value(exitCode); @@ -328,7 +334,10 @@ class ContainerQuery } else { - syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + if (cJSON_GetObjectItem(entry, "Id")) + { + syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + } } } catch (std::exception &e) @@ -352,7 +361,7 @@ class ContainerQuery try { cJSON* hostConfig = cJSON_GetObjectItem(entry, "HostConfig"); - if (hostConfig) + if (hostConfig != NULL) { // Links cJSON* objItem = cJSON_GetObjectItem(hostConfig, "Links"); @@ -372,7 +381,10 @@ class ContainerQuery } else { - syslog(LOG_WARNING, "Attempt in ObtainContainerHostConfig to get container %s host config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + if (cJSON_GetObjectItem(entry, "Id")) + { + syslog(LOG_WARNING, "Attempt in ObtainContainerHostConfig to get container %s host config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + } } } catch (std::exception &e) diff --git a/source/code/providers/Container_ContainerStatistics_Class_Provider.cpp b/source/code/providers/Container_ContainerStatistics_Class_Provider.cpp index c43057ec7..08b68b1d8 100644 --- a/source/code/providers/Container_ContainerStatistics_Class_Provider.cpp +++ b/source/code/providers/Container_ContainerStatistics_Class_Provider.cpp @@ -34,17 +34,17 @@ class StatsQuery int totalRx = 0; int totalTx = 0; - if (stats) + if (stats != NULL) { cJSON* network = cJSON_GetObjectItem(stats, "networks"); - if (network) + if (network != NULL) { // Docker 1.9+ network = network->child; // Sum the number of bytes from each NIC if there is more than one - while (network) + while (network != NULL) { cJSON* objItem = cJSON_GetObjectItem(network, "rx_bytes"); if (objItem != NULL) { @@ -66,7 +66,7 @@ class StatsQuery { // Docker 1.8.x network = cJSON_GetObjectItem(stats, "network"); - if (network) + if (network != NULL) { cJSON* objItem = cJSON_GetObjectItem(network, "rx_bytes"); if (objItem != NULL) { @@ -110,7 +110,7 @@ class StatsQuery static void TrySetContainerMemoryData(Container_ContainerStatistics_Class& instance, cJSON* stats) { try { - if (stats) + if (stats != NULL) { cJSON* memory_stats = cJSON_GetObjectItem(stats, "memory_stats"); if (memory_stats != NULL) { @@ -150,27 +150,27 @@ class StatsQuery instance.DiskBytesRead_value(0); instance.DiskBytesWritten_value(0); - if (stats) + if (stats != NULL) { cJSON* blkio_stats = cJSON_GetObjectItem(stats, "blkio_stats"); - if (blkio_stats) + if (blkio_stats != NULL) { cJSON* values = cJSON_GetObjectItem(blkio_stats, "io_service_bytes_recursive"); bool readFlag = false; bool writeFlag = false; - for (int i = 0; values && !(readFlag && writeFlag) && i < cJSON_GetArraySize(values); i++) + for (int i = 0; values != NULL && !(readFlag && writeFlag) && i < cJSON_GetArraySize(values); i++) { cJSON* entry = cJSON_GetArrayItem(values, i); - if (entry) + if (entry != NULL) { cJSON* op = cJSON_GetObjectItem(entry, "op"); cJSON* rawValue = cJSON_GetObjectItem(entry, "value"); - if (op && rawValue) + if ((op != NULL) && (rawValue != NULL)) { if (!strcmp(op->valuestring, "Read")) { @@ -215,15 +215,15 @@ class StatsQuery result["system"] = 0; try { - if (stats) + if (stats != NULL) { cJSON* cpu_stats = cJSON_GetObjectItem(stats, "cpu_stats"); - if (cpu_stats) + if (cpu_stats != NULL) { cJSON* cpu_usage = cJSON_GetObjectItem(cpu_stats, "cpu_usage"); - if (cpu_usage) + if (cpu_usage != NULL) { cJSON* objItem = cJSON_GetObjectItem(cpu_usage, "total_usage"); if (objItem != NULL) { @@ -269,15 +269,15 @@ class StatsQuery instance.CPUTotal_value(0); instance.CPUTotalPct_value(0); - if (stats) + if (stats != NULL) { cJSON* cpu_stats = cJSON_GetObjectItem(stats, "cpu_stats"); - if (cpu_stats) + if (cpu_stats != NULL) { cJSON* cpu_usage = cJSON_GetObjectItem(cpu_stats, "cpu_usage"); - if (cpu_usage) + if (cpu_usage != NULL) { cJSON* totalUsageItem = cJSON_GetObjectItem(cpu_usage, "total_usage"); cJSON* systemCpuUsageItem = cJSON_GetObjectItem(cpu_stats, "system_cpu_usage"); @@ -333,7 +333,7 @@ class StatsQuery { cJSON* entry = cJSON_GetArrayItem(response[0], i); - if (entry) + if (entry != NULL) { // New perf entry Container_ContainerStatistics_Class instance; @@ -396,7 +396,10 @@ class StatsQuery // See http://docs.docker.com/engine/reference/api/docker_remote_api_v1.21/#get-container-stats-based-on-resource-usage for example output if (!subResponse.empty() && subResponse[0]) { - TrySetContainerCpuData(result[i], subResponse[0], previousStatsList[i]); + if (i < previousStatsList.size()) + { + TrySetContainerCpuData(result[i], subResponse[0], previousStatsList[i]); + } // Set container name in 'InstanceName' field of Perf data. result[i].InstanceID_value(result[i].ElementName_value()); diff --git a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp index d5d2ce6f2..bf2ab3b53 100644 --- a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp +++ b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp @@ -137,11 +137,11 @@ class EventQuery { cJSON* entry = cJSON_GetArrayItem(response[0], i); - if (entry) + if (entry != NULL) { cJSON* nameField = cJSON_GetObjectItem(entry, "Names"); - if (nameField && cJSON_GetArraySize(nameField)) + if ((nameField != NULL) && cJSON_GetArraySize(nameField)) { // Docker API documentation says that this field contains the short ID but that is not the case; use full ID instead cJSON* objItem = cJSON_GetObjectItem(entry, "Id"); @@ -239,7 +239,7 @@ class EventQuery cJSON* entry = cJSON_GetArrayItem(response[0], i); // the newer versions of the API may return objects that do not have status or id - if (entry && cJSON_GetObjectItem(entry, "status") != NULL && cJSON_GetObjectItem(entry, "id") != NULL) + if ((entry != NULL) && cJSON_GetObjectItem(entry, "status") != NULL && cJSON_GetObjectItem(entry, "id") != NULL) { // New inventory entry Container_DaemonEvent_Class instance; diff --git a/source/code/providers/Container_ImageInventory_Class_Provider.cpp b/source/code/providers/Container_ImageInventory_Class_Provider.cpp index 3cc088683..01d1c639c 100644 --- a/source/code/providers/Container_ImageInventory_Class_Provider.cpp +++ b/source/code/providers/Container_ImageInventory_Class_Provider.cpp @@ -35,7 +35,7 @@ class InventoryQuery string result = ""; try { - if (tags && cJSON_GetArraySize(tags)) + if ((tags != NULL) && cJSON_GetArraySize(tags)) { bool flag = false; @@ -164,7 +164,7 @@ class InventoryQuery try { cJSON* state = cJSON_GetObjectItem(entry, "State"); - if (state) + if (state != NULL) { cJSON* objItem = cJSON_GetObjectItem(entry, "Image"); if (objItem != NULL) @@ -173,10 +173,10 @@ class InventoryQuery { string id = string(objItem->valuestring); - if (cJSON_GetObjectItem(state, "Running")->valueint) + if (cJSON_GetObjectItem(state, "Running") != NULL && cJSON_GetObjectItem(state, "Running")->valueint) { // Running container - if (cJSON_GetObjectItem(state, "Paused")->valueint) + if (cJSON_GetObjectItem(state, "Paused") != NULL && cJSON_GetObjectItem(state, "Paused")->valueint) { // Paused container instances[idTable[id]].Paused_value(instances[idTable[id]].Paused_value() + 1); @@ -188,7 +188,7 @@ class InventoryQuery } else { - if (cJSON_GetObjectItem(state, "ExitCode")->valueint) + if (cJSON_GetObjectItem(state, "ExitCode") != NULL && cJSON_GetObjectItem(state, "ExitCode")->valueint) { // Container exited nonzero instances[idTable[id]].Failed_value(instances[idTable[id]].Failed_value() + 1); @@ -206,7 +206,10 @@ class InventoryQuery } else { - syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + if (cJSON_GetObjectItem(entry, "Id") != NULL) + { + syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + } } } catch (std::exception &e) @@ -239,7 +242,7 @@ class InventoryQuery { cJSON* entry = cJSON_GetArrayItem(response[0], i); - if (entry) + if (entry != NULL) { cJSON* objItem = cJSON_GetObjectItem(entry, "Id"); if (objItem != NULL) @@ -321,7 +324,7 @@ class InventoryQuery { cJSON* entry = cJSON_GetArrayItem(response[0], i); - if (entry) + if (entry != NULL) { // New inventory entry Container_ImageInventory_Class instance; diff --git a/source/code/providers/Container_Process_Class_Provider.cpp b/source/code/providers/Container_Process_Class_Provider.cpp index 76b15bdfc..9adc4edcd 100644 --- a/source/code/providers/Container_Process_Class_Provider.cpp +++ b/source/code/providers/Container_Process_Class_Provider.cpp @@ -55,7 +55,7 @@ class ContainerProcessQuery for (int i = 0; i < cJSON_GetArraySize(dockerPsResponse[0]); i++) { cJSON* containerEntry = cJSON_GetArrayItem(dockerPsResponse[0], i); - if (containerEntry) + if (containerEntry != NULL) { cJSON* objItem = cJSON_GetObjectItem(containerEntry, "Id"); if (objItem != NULL) From 4b630215824d85d568fd384b1bbee071996bec1a Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Thu, 27 Sep 2018 16:10:59 -0700 Subject: [PATCH 12/88] Adding a missed null check (#135) --- .../code/providers/Container_DaemonEvent_Class_Provider.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp index bf2ab3b53..51e253d73 100644 --- a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp +++ b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp @@ -289,7 +289,10 @@ class EventQuery else { // Image event - instance.ElementName_value(cJSON_GetObjectItem(entry, "id")->valuestring); + if (cJSON_GetObjectItem(entry, "id") != NULL) + { + instance.ElementName_value(cJSON_GetObjectItem(entry, "id")->valuestring); + } instance.Id_value(""); instance.ContainerName_value(""); } From 8b964fd7ee54948b7374ed44f3253d0d89ceb443 Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Thu, 27 Sep 2018 17:01:04 -0700 Subject: [PATCH 13/88] reusing some variables (#136) --- ...iner_ContainerInventory_Class_Provider.cpp | 26 ++++++++++++------- .../Container_DaemonEvent_Class_Provider.cpp | 5 ++-- ...ontainer_ImageInventory_Class_Provider.cpp | 16 +++++++----- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/source/code/providers/Container_ContainerInventory_Class_Provider.cpp b/source/code/providers/Container_ContainerInventory_Class_Provider.cpp index 68c13053a..ded8fb869 100644 --- a/source/code/providers/Container_ContainerInventory_Class_Provider.cpp +++ b/source/code/providers/Container_ContainerInventory_Class_Provider.cpp @@ -210,7 +210,11 @@ class ContainerQuery correctedstring = stringToTruncate + "\"]"; } instance.EnvironmentVar_value(correctedstring.c_str()); - syslog(LOG_WARNING, "Environment variable truncated for container %s", cJSON_GetObjectItem(entry, "Id")->valuestring); + cJSON* idItem = cJSON_GetObjectItem(entry, "Id"); + if (idItem != NULL) + { + syslog(LOG_WARNING, "Environment variable truncated for container %s", idItem->valuestring); + } } else { instance.EnvironmentVar_value(strcmp(env, "null") ? env : ""); @@ -244,9 +248,10 @@ class ContainerQuery } else { - if (cJSON_GetObjectItem(entry, "Id") != NULL) + cJSON* idItem = cJSON_GetObjectItem(entry, "Id"); + if (idItem != NULL) { - syslog(LOG_WARNING, "Attempt in ObtainContainerConfig to get container %s config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + syslog(LOG_WARNING, "Attempt in ObtainContainerConfig to get container %s config information returned null", idItem->valuestring); } } } @@ -281,9 +286,10 @@ class ContainerQuery if (exitCode < 0) { exitCode = 128; - if (cJSON_GetObjectItem(entry, "Id") != NULL) + cJSON* idItem = cJSON_GetObjectItem(entry, "Id"); + if (idItem != NULL) { - syslog(LOG_NOTICE, "Container %s returned negative exit code", cJSON_GetObjectItem(entry, "Id")->valuestring); + syslog(LOG_NOTICE, "Container %s returned negative exit code", idItem->valuestring); } } @@ -334,9 +340,10 @@ class ContainerQuery } else { - if (cJSON_GetObjectItem(entry, "Id")) + cJSON* idItem = cJSON_GetObjectItem(entry, "Id"); + if (idItem) { - syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", idItem->valuestring); } } } @@ -381,9 +388,10 @@ class ContainerQuery } else { - if (cJSON_GetObjectItem(entry, "Id")) + cJSON* idItem = cJSON_GetObjectItem(entry, "Id"); + if (idItem != NULL) { - syslog(LOG_WARNING, "Attempt in ObtainContainerHostConfig to get container %s host config information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + syslog(LOG_WARNING, "Attempt in ObtainContainerHostConfig to get container %s host config information returned null", idItem->valuestring); } } } diff --git a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp index 51e253d73..0c28e4769 100644 --- a/source/code/providers/Container_DaemonEvent_Class_Provider.cpp +++ b/source/code/providers/Container_DaemonEvent_Class_Provider.cpp @@ -288,10 +288,11 @@ class EventQuery } else { + cJSON* idItem = cJSON_GetObjectItem(entry, "id"); // Image event - if (cJSON_GetObjectItem(entry, "id") != NULL) + if (idItem != NULL) { - instance.ElementName_value(cJSON_GetObjectItem(entry, "id")->valuestring); + instance.ElementName_value(idItem->valuestring); } instance.Id_value(""); instance.ContainerName_value(""); diff --git a/source/code/providers/Container_ImageInventory_Class_Provider.cpp b/source/code/providers/Container_ImageInventory_Class_Provider.cpp index 01d1c639c..f5742ef5f 100644 --- a/source/code/providers/Container_ImageInventory_Class_Provider.cpp +++ b/source/code/providers/Container_ImageInventory_Class_Provider.cpp @@ -173,10 +173,12 @@ class InventoryQuery { string id = string(objItem->valuestring); - if (cJSON_GetObjectItem(state, "Running") != NULL && cJSON_GetObjectItem(state, "Running")->valueint) + cJSON* runningItem = cJSON_GetObjectItem(state, "Running"); + if (runningItem != NULL && runningItem->valueint) { // Running container - if (cJSON_GetObjectItem(state, "Paused") != NULL && cJSON_GetObjectItem(state, "Paused")->valueint) + cJSON* pausedItem = cJSON_GetObjectItem(state, "Paused"); + if (pausedItem != NULL && pausedItem->valueint) { // Paused container instances[idTable[id]].Paused_value(instances[idTable[id]].Paused_value() + 1); @@ -188,7 +190,8 @@ class InventoryQuery } else { - if (cJSON_GetObjectItem(state, "ExitCode") != NULL && cJSON_GetObjectItem(state, "ExitCode")->valueint) + cJSON* exitCodeItem = cJSON_GetObjectItem(state, "ExitCode"); + if (exitCodeItem != NULL && exitCodeItem->valueint) { // Container exited nonzero instances[idTable[id]].Failed_value(instances[idTable[id]].Failed_value() + 1); @@ -206,9 +209,10 @@ class InventoryQuery } else { - if (cJSON_GetObjectItem(entry, "Id") != NULL) + cJSON* idItem = cJSON_GetObjectItem(entry, "Id"); + if (idItem != NULL) { - syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + syslog(LOG_WARNING, "Attempt in ObtainContainerState to get container %s state information returned null", idItem->valuestring); } } } @@ -263,7 +267,7 @@ class InventoryQuery } else { - syslog(LOG_WARNING, "API call in AggregateContainerStatus to inspect container %s returned null", cJSON_GetObjectItem(entry, "Id")->valuestring); + syslog(LOG_WARNING, "API call in AggregateContainerStatus to inspect container %s returned null", objItem->valuestring); } } } From 938c2edc0d84917c123c2947c791fa3806fce25c Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Fri, 28 Sep 2018 16:00:29 -0700 Subject: [PATCH 14/88] Rashmi/cjson delete null check (#138) * adding null check for cjson-delete * null chk * removing null check --- source/code/providers/Container_Process_Class_Provider.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/code/providers/Container_Process_Class_Provider.cpp b/source/code/providers/Container_Process_Class_Provider.cpp index 9adc4edcd..e27df1788 100644 --- a/source/code/providers/Container_Process_Class_Provider.cpp +++ b/source/code/providers/Container_Process_Class_Provider.cpp @@ -163,7 +163,10 @@ class ContainerProcessQuery } } } - cJSON_Delete(dockerPsResponse[0]); + if (!dockerPsResponse.empty() && dockerPsResponse[0]) + { + cJSON_Delete(dockerPsResponse[0]); + } } catch (std::exception &e) { From fbfdf11e98cebbbc623bd845bf3010b46dd3918b Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Tue, 2 Oct 2018 17:33:22 -0700 Subject: [PATCH 15/88] updating log level to debug for some provider workflows (#139) --- installer/conf/container.conf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 9eaed9b47..a41b963a9 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -111,7 +111,7 @@ type out_oms - log_level info + log_level debug buffer_chunk_limit 20m buffer_type file buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer @@ -124,7 +124,7 @@ type out_oms - log_level info + log_level debug buffer_chunk_limit 20m buffer_type file buffer_path %STATE_DIR_WS%/out_oms_imageinventory*.buffer @@ -137,7 +137,7 @@ type out_oms - log_level info + log_level debug buffer_chunk_limit 20m buffer_type file buffer_path %STATE_DIR_WS%/out_oms_servicelog*.buffer From d4260663ccaeae093911052ab47bb2f644f3e56c Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Thu, 4 Oct 2018 14:01:11 -0700 Subject: [PATCH 16/88] Fixing CPU Utilization and removing Fluent-bit filters (#140) Removing fluent-bit filters, CPU optimizations --- installer/conf/td-agent-bit.conf | 20 ++---------- source/code/go/src/plugins/oms.go | 47 ++++++++++++++++----------- source/code/go/src/plugins/out_oms.go | 2 +- 3 files changed, 32 insertions(+), 37 deletions(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 84a9fcf94..27916eafd 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -12,23 +12,9 @@ Parser docker Mem_Buf_Limit 30m Path_Key filepath - -[FILTER] - Name record_modifier - Match oms.container.log.* - Whitelist_key log - Whitelist_key stream - Whitelist_key time - Whitelist_key filepath - -[FILTER] - Name modify - Match oms.container.log.* - Rename log LogEntry - Rename stream LogEntrySource - Rename time LogEntryTimeStamp - Rename filepath Filepath - Add_if_not_present SourceSystem Containers + Buffer_Chunk_Size 1m + Buffer_Max_Size 1m + Skip_Long_Lines On [OUTPUT] Name oms diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 2e9e2f3d0..c7fe8eb42 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -12,11 +12,11 @@ import ( "strings" "sync" "time" -) -import ( + "github.com/fluent/fluent-bit-go/output" - "github.com/mitchellh/mapstructure" + lumberjack "gopkg.in/natefinch/lumberjack.v2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -81,7 +81,6 @@ type DataItem struct { Name string `json:"Name"` SourceSystem string `json:"SourceSystem"` Computer string `json:"Computer"` - Filepath string `json:"Filepath"` } // ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point @@ -199,23 +198,18 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { for _, record := range tailPluginRecords { - filepath := toString(record["Filepath"]) - containerID := getContainerIDFromFilePath(filepath) + containerID := GetContainerIDFromFilePath(toString(record["filepath"])) if containerID == "" || containsKey(IgnoreIDSet, containerID) { continue } - var dataItem DataItem stringMap := make(map[string]string) - // convert map[interface{}]interface{} to map[string]string - for key, value := range record { - strKey := fmt.Sprintf("%v", key) - strValue := toString(value) - stringMap[strKey] = strValue - } - + stringMap["LogEntry"] = toString(record["log"]) + stringMap["LogEntrySource"] = toString(record["stream"]) + stringMap["LogEntryTimeStamp"] = toString(record["time"]) + stringMap["SourceSystem"] = "Containers" stringMap["Id"] = containerID if val, ok := ImageIDMap[containerID]; ok { @@ -238,8 +232,17 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { } } - stringMap["Computer"] = Computer - mapstructure.Decode(stringMap, &dataItem) + dataItem := DataItem{ + ID: stringMap["Id"], + LogEntry: stringMap["LogEntry"], + LogEntrySource: stringMap["LogEntrySource"], + LogEntryTimeStamp: stringMap["LogEntryTimeStamp"], + SourceSystem: stringMap["SourceSystem"], + Computer: Computer, + Image: stringMap["Image"], + Name: stringMap["Name"], + } + dataItems = append(dataItems, dataItem) } @@ -281,11 +284,17 @@ func containsKey(currentMap map[string]bool, key string) bool { } func toString(s interface{}) string { - value := s.([]uint8) - return string([]byte(value[:])) + switch t := s.(type) { + case []byte: + // prevent encoding to base64 + return string(t) + default: + return "" + } } -func getContainerIDFromFilePath(filepath string) string { +// GetContainerIDFromFilePath Gets the container ID From the file Path +func GetContainerIDFromFilePath(filepath string) string { start := strings.LastIndex(filepath, "-") end := strings.LastIndex(filepath, ".") if start >= end || start == -1 || end == -1 { diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index ec9a573d1..0efc1242d 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -10,7 +10,7 @@ import ( //export FLBPluginRegister func FLBPluginRegister(ctx unsafe.Pointer) int { - return output.FLBPluginRegister(ctx, "oms", "Stdout GO!") + return output.FLBPluginRegister(ctx, "oms", "OMS GO!") } //export FLBPluginInit From c2cabab7199870af23bb90de10bca4d8eb50e847 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Tue, 9 Oct 2018 14:50:10 -0700 Subject: [PATCH 17/88] Minor tweaks 1. Remove some logging 2. Added more Error Handling 3. Continue when there is an error with k8s api (#141) * Removing some logs, added more error checking, continue on kube-api error * Return FLB OK for json Marshall error, instead of RETRY --- source/code/go/src/plugins/oms.go | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index c7fe8eb42..d20f11d57 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -133,6 +133,7 @@ func updateContainerImageNameMaps() { pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{}) if err != nil { Log("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) + continue } for _, pod := range pods.Items { @@ -216,20 +217,12 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { stringMap["Image"] = val } else { Log("ContainerId %s not present in Map ", containerID) - Log("CurrentMap Snapshot \n") - for k, v := range ImageIDMap { - Log("%s ==> %s", k, v) - } } if val, ok := NameIDMap[containerID]; ok { stringMap["Name"] = val } else { Log("ContainerId %s not present in Map ", containerID) - Log("CurrentMap Snapshot \n") - for k, v := range NameIDMap { - Log("%s ==> %s", k, v) - } } dataItem := DataItem{ @@ -253,6 +246,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { DataItems: dataItems} marshalled, err := json.Marshal(logEntry) + if err != nil { + Log("Error while Marshalling log Entry: %s", err.Error()) + return output.FLB_OK + } req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) req.Header.Set("Content-Type", "application/json") From 32567db6965f65154663c0204c1a3e2a599530d0 Mon Sep 17 00:00:00 2001 From: Vishwanath Narasimhan Date: Wed, 10 Oct 2018 14:09:04 -0700 Subject: [PATCH 18/88] * Change FluentBit flush interval to 30 secs (from 5 secs) * Remove ContainerPerf, ContainerServiceLog,ContainerProcess (OMI workflows) for Daemonset --- installer/conf/container.conf | 33 -------------------------------- installer/conf/td-agent-bit.conf | 2 +- 2 files changed, 1 insertion(+), 34 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index a41b963a9..1916300cb 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -7,19 +7,6 @@ bind 127.0.0.1 -# Filter container logs - - type filter_docker_log - log_path "/var/opt/microsoft/omsagent/log/filter_docker_log.txt" - - -# Container perf - - type oms_omi - object_name "Container" - interval 30s - - # Container inventory type omi @@ -40,16 +27,6 @@ ] -# Container service log - - type omi - run_interval 60s - tag oms.container.servicelog - items [ - ["root/cimv2","Container_DaemonEvent"] - ] - - # Container host inventory type omi @@ -60,16 +37,6 @@ ] -# Container processes - - type omi - run_interval 60s - tag oms.api.ContainerProcess - items [ - ["root/cimv2","Container_Process"] - ] - - #cadvisor perf type cadvisorperf diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 27916eafd..b5d2309e1 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -1,5 +1,5 @@ [SERVICE] - Flush 5 + Flush 30 Log_Level info Parsers_File /etc/td-agent-bit/parsers.conf Log_File /var/opt/microsoft/docker-cimprov/log/fluent-bit.log From afc981d504c3f44fd3232892e4823d5d09503d14 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Thu, 11 Oct 2018 21:37:09 -0700 Subject: [PATCH 19/88] Container Log Telemetry --- .gitignore | 3 + installer/conf/td-agent-bit.conf | 7 +- source/code/go/src/plugins/glide.lock | 10 +- source/code/go/src/plugins/glide.yaml | 8 +- source/code/go/src/plugins/oms.go | 9 +- source/code/go/src/plugins/out_oms.go | 10 ++ source/code/go/src/plugins/telemetry.go | 151 ++++++++++++++++++++++++ 7 files changed, 188 insertions(+), 10 deletions(-) create mode 100644 source/code/go/src/plugins/telemetry.go diff --git a/.gitignore b/.gitignore index 92c8c0cf2..e58d69f7b 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ /test/code/providers/TestScriptPath.h /test/code/providers/providertestutils.cpp +source/code/go/src/plugins/profiling +.vscode/launch.json +source/code/go/src/plugins/vendor/ \ No newline at end of file diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index b5d2309e1..5a1c105bf 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -17,5 +17,8 @@ Skip_Long_Lines On [OUTPUT] - Name oms - Match oms.container.log.* \ No newline at end of file + Name oms + EnableTelemetry true + TelemetryPushInterval 300 + Match oms.container.log.* + AgentVersion internaltest1004-2 \ No newline at end of file diff --git a/source/code/go/src/plugins/glide.lock b/source/code/go/src/plugins/glide.lock index 4597b594a..fc147fe74 100644 --- a/source/code/go/src/plugins/glide.lock +++ b/source/code/go/src/plugins/glide.lock @@ -1,5 +1,5 @@ -hash: bb32415f402ab29751f29b8e394bc974cbc31861453d817aaeb94ef83dacc488 -updated: 2018-09-14T18:14:28.748047598Z +hash: a6a873d09ed9c3d890a70122e61efba992ead9850fe48f6fcb020d86800d4ade +updated: 2018-10-10T13:37:51.9703908-07:00 imports: - name: github.com/fluent/fluent-bit-go version: c4a158a6e3a793166c6ecfa2d5c80d71eada8959 @@ -38,8 +38,10 @@ imports: - diskcache - name: github.com/json-iterator/go version: f2b4162afba35581b6d4a50d3b8f34e33c144682 -- name: github.com/mitchellh/mapstructure - version: fa473d140ef3c6adf42d6b391fe76707f1f243c8 +- name: github.com/Microsoft/ApplicationInsights-Go + version: d2df5d440eda5372f24fcac03839a64d6cb5f7e5 + subpackages: + - appinsights - name: github.com/modern-go/concurrent version: bacd9c7ef1dd9b15be4a9909b8ac7a4e313eec94 - name: github.com/modern-go/reflect2 diff --git a/source/code/go/src/plugins/glide.yaml b/source/code/go/src/plugins/glide.yaml index 403e1efc4..b2829391b 100644 --- a/source/code/go/src/plugins/glide.yaml +++ b/source/code/go/src/plugins/glide.yaml @@ -1,10 +1,8 @@ -package: plugins +package: . import: - package: github.com/fluent/fluent-bit-go subpackages: - output -- package: github.com/mitchellh/mapstructure - version: ^1.0.0 - package: gopkg.in/natefinch/lumberjack.v2 version: ^2.1.0 - package: k8s.io/apimachinery @@ -15,3 +13,7 @@ import: subpackages: - kubernetes - rest +- package: github.com/Microsoft/ApplicationInsights-Go + version: ^0.4.2 + subpackages: + - appinsights diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index d20f11d57..807e00937 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -42,6 +42,8 @@ var ( OMSEndpoint string // Computer (Hostname) when ingesting into ContainerLog table Computer string + // WorkspaceID log analytics workspace id + WorkspaceID string ) var ( @@ -170,6 +172,7 @@ func updateKubeSystemContainerIDs() { pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{}) if err != nil { Log("Error getting pods %s\nIt is ok to log here and continue. Kube-system logs will be collected", err.Error()) + continue } _ignoreIDSet := make(map[string]bool) @@ -269,7 +272,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { return output.FLB_RETRY } - Log("Successfully flushed %d records in %s", len(dataItems), elapsed) + numRecords := len(dataItems) + Log("Successfully flushed %d records in %s", numRecords, elapsed) + FlushedRecordsCount += float64(numRecords) + FlushedRecordsTimeTaken += float64(elapsed / time.Millisecond) } return output.FLB_OK @@ -322,6 +328,7 @@ func InitializePlugin(pluginConfPath string) { log.Fatalf("Error Reading omsadmin configuration %s\n", err.Error()) } OMSEndpoint = omsadminConf["OMS_ENDPOINT"] + WorkspaceID = omsadminConf["WORKSPACE_ID"] Log("OMSEndpoint %s", OMSEndpoint) // Initialize image,name map refresh ticker diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index 0efc1242d..37c9eb12b 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -5,6 +5,7 @@ import ( ) import ( "C" + "strings" "unsafe" ) @@ -19,6 +20,14 @@ func FLBPluginRegister(ctx unsafe.Pointer) int { func FLBPluginInit(ctx unsafe.Pointer) int { Log("Initializing out_oms go plugin for fluentbit") InitializePlugin(ContainerLogPluginConfFilePath) + enablePlugin := output.FLBPluginConfigKey(ctx, "EnableTelemetry") + telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushInterval") + agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion") + + if strings.Compare(strings.ToLower(enablePlugin), "true") == 0 { + go SendContainerLogFlushRateMetric(telemetryPushInterval, agentVersion) + SendEvent(EventNameContainerLogInit, make(map[string]string)) + } return output.FLB_OK } @@ -48,6 +57,7 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { // FLBPluginExit exits the plugin func FLBPluginExit() int { + defer TelemetryShutdown() KubeSystemContainersRefreshTicker.Stop() ContainerImageNameRefreshTicker.Stop() return output.FLB_OK diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go new file mode 100644 index 000000000..4d4ab2371 --- /dev/null +++ b/source/code/go/src/plugins/telemetry.go @@ -0,0 +1,151 @@ +package main + +import ( + "encoding/base64" + "errors" + "os" + "strconv" + "strings" + "time" + + "github.com/Microsoft/ApplicationInsights-Go/appinsights" +) + +var ( + // FlushedRecordsCount indicates the number of flushed records in the current period + FlushedRecordsCount float64 + // FlushedRecordsTimeTaken indicates the cumulative time taken to flush the records for the current period + FlushedRecordsTimeTaken float64 + // CommonProperties indicates the dimensions that are sent with every event/metric + CommonProperties map[string]string + // TelemetryClient is the client used to send the telemetry + TelemetryClient appinsights.TelemetryClient + // ContainerLogTelemetryTicker sends telemetry periodically + ContainerLogTelemetryTicker *time.Ticker +) + +const ( + clusterTypeACS = "ACS" + clusterTypeAKS = "AKS" + controllerTypeDaemonSet = "DaemonSet" + controllerTypeReplicaSet = "ReplicaSet" + envAKSResourceID = "AKS_RESOURCE_ID" + envACSResourceName = "ACS_RESOURCE_NAME" + envAppInsightsInstrumentationKey = "APPLICATIONINSIGHTS_INSTRUMENTATIONKEY" + metricNameAvgFlushRate = "ContainerLogAvgRecordsFlushedPerSec" + defaultTelemetryPushInterval = 300 + + // EventNameContainerLogInit name of the event + EventNameContainerLogInit = "ContainerLogPluginInitialized" +) + +// Initialize initializes the telemetry artifacts +func initialize(telemetryIntervalProperty string, agentVersion string) (int, error) { + + telemetryInterval, err := strconv.Atoi(telemetryIntervalProperty) + if err != nil { + telemetryInterval = defaultTelemetryPushInterval + } + + ContainerLogTelemetryTicker = time.NewTicker(time.Second * time.Duration(telemetryInterval)) + + encodedIkey := os.Getenv(envAppInsightsInstrumentationKey) + if encodedIkey == "" { + Log("App Insights IKey missing in Environment Variables \n") + return -1, errors.New("Missing App Insights Instrumentation Key Environment Variable") + } + + decIkey, err := base64.StdEncoding.DecodeString(encodedIkey) + if err != nil { + Log("Error Decoding encoded Instrumentation key %s", err.Error()) + return -1, err + } + + TelemetryClient = appinsights.NewTelemetryClient(string(decIkey)) + + CommonProperties = make(map[string]string) + CommonProperties["Computer"] = Computer + CommonProperties["WorkspaceID"] = WorkspaceID + CommonProperties["ControllerType"] = controllerTypeDaemonSet + CommonProperties["AgentVersion"] = agentVersion + + aksResourceID := os.Getenv(envAKSResourceID) + // if the aks resource id is not defined, it is most likely an ACS Cluster + if aksResourceID == "" { + CommonProperties["ACSResourceName"] = os.Getenv(envACSResourceName) + CommonProperties["ClusterType"] = clusterTypeACS + + CommonProperties["SubscriptionID"] = "" + CommonProperties["ResourceGroupName"] = "" + CommonProperties["ClusterName"] = "" + CommonProperties["Region"] = "" + + } else { + CommonProperties["ACSResourceName"] = "" + splitStrings := strings.Split(aksResourceID, "/") + CommonProperties["SubscriptionID"] = splitStrings[2] + CommonProperties["ResourceGroupName"] = splitStrings[4] + CommonProperties["ClusterName"] = splitStrings[8] + CommonProperties["ClusterType"] = clusterTypeAKS + + region := os.Getenv("AKS_REGION") + if region != "" { + CommonProperties["Region"] = region + } + } + + TelemetryClient.Context().CommonProperties = CommonProperties + return 0, nil +} + +// SendContainerLogFlushRateMetric is a go-routine that flushes the data periodically (every 5 mins to App Insights) +func SendContainerLogFlushRateMetric(telemetryIntervalProperty string, agentVersion string) { + + ret, err := initialize(telemetryIntervalProperty, agentVersion) + if ret != 0 || err != nil { + Log("Error During Telemetry Initialization :%s", err.Error()) + return + } + + for ; true; <-ContainerLogTelemetryTicker.C { + flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000 + metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate) + Log("Flushed Records : %f Time Taken : %f flush Rate : %f", FlushedRecordsCount, FlushedRecordsTimeTaken, flushRate) + TelemetryClient.Track(metric) + FlushedRecordsCount = 0.0 + FlushedRecordsTimeTaken = 0.0 + } +} + +// TelemetryShutdown stops the ticker that sends data to App Insights periodically +func TelemetryShutdown() { + Log("Shutting down ContainerLog Telemetry\n") + ContainerLogTelemetryTicker.Stop() +} + +// SendEvent sends an event to App Insights +func SendEvent(eventName string, dimensions map[string]string) { + // this is because the TelemetryClient is initialized in a different goroutine. A simple wait loop here is just waiting for it to be initialized. This will happen only for the init event. Any subsequent Event should work just fine + for TelemetryClient == nil { + Log("Waiting for Telemetry Client to be initialized") + time.Sleep(1 * time.Second) + } + + // take a copy so the CommonProperties can be restored later + _commonProps := make(map[string]string) + for k, v := range TelemetryClient.Context().CommonProperties { + _commonProps[k] = v + } + + // add any extra dimensions + for k, v := range dimensions { + TelemetryClient.Context().CommonProperties[k] = v + } + + Log("Sending Event : %s\n", eventName) + event := appinsights.NewEventTelemetry(eventName) + TelemetryClient.Track(event) + + // restore original CommonProperties + TelemetryClient.Context().CommonProperties = _commonProps +} From 4b958dde94450e96d6d46351756c83500df7935f Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 12 Oct 2018 09:18:10 -0700 Subject: [PATCH 20/88] Fixing an issue with Send Init Event if Telemetry is not initialized properly, tab to whitespace in conf file --- installer/conf/td-agent-bit.conf | 2 +- source/code/go/src/plugins/out_oms.go | 7 ++-- source/code/go/src/plugins/telemetry.go | 44 ++++++++++++++----------- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 5a1c105bf..6849a3744 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -19,6 +19,6 @@ [OUTPUT] Name oms EnableTelemetry true - TelemetryPushInterval 300 + TelemetryPushInterval 300 Match oms.container.log.* AgentVersion internaltest1004-2 \ No newline at end of file diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index 37c9eb12b..2603368ab 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -20,13 +20,14 @@ func FLBPluginRegister(ctx unsafe.Pointer) int { func FLBPluginInit(ctx unsafe.Pointer) int { Log("Initializing out_oms go plugin for fluentbit") InitializePlugin(ContainerLogPluginConfFilePath) - enablePlugin := output.FLBPluginConfigKey(ctx, "EnableTelemetry") + enableTelemetry := output.FLBPluginConfigKey(ctx, "EnableTelemetry") telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushInterval") agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion") - if strings.Compare(strings.ToLower(enablePlugin), "true") == 0 { + if strings.Compare(strings.ToLower(enableTelemetry), "true") == 0 { go SendContainerLogFlushRateMetric(telemetryPushInterval, agentVersion) - SendEvent(EventNameContainerLogInit, make(map[string]string)) + } else { + Log("Telemetry is not enabled for the plugin %s \n", output.FLBPluginConfigKey(ctx, "Name")) } return output.FLB_OK } diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 4d4ab2371..c2f565a45 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -4,6 +4,7 @@ import ( "encoding/base64" "errors" "os" + "runtime" "strconv" "strings" "time" @@ -25,39 +26,40 @@ var ( ) const ( - clusterTypeACS = "ACS" - clusterTypeAKS = "AKS" - controllerTypeDaemonSet = "DaemonSet" - controllerTypeReplicaSet = "ReplicaSet" - envAKSResourceID = "AKS_RESOURCE_ID" - envACSResourceName = "ACS_RESOURCE_NAME" - envAppInsightsInstrumentationKey = "APPLICATIONINSIGHTS_INSTRUMENTATIONKEY" - metricNameAvgFlushRate = "ContainerLogAvgRecordsFlushedPerSec" - defaultTelemetryPushInterval = 300 + clusterTypeACS = "ACS" + clusterTypeAKS = "AKS" + controllerTypeDaemonSet = "DaemonSet" + controllerTypeReplicaSet = "ReplicaSet" + envAKSResourceID = "AKS_RESOURCE_ID" + envACSResourceName = "ACS_RESOURCE_NAME" + envAppInsightsAuth = "APPLICATIONINSIGHTS_AUTH" + metricNameAvgFlushRate = "ContainerLogAvgRecordsFlushedPerSec" + defaultTelemetryPushInterval = 300 // EventNameContainerLogInit name of the event EventNameContainerLogInit = "ContainerLogPluginInitialized" ) // Initialize initializes the telemetry artifacts -func initialize(telemetryIntervalProperty string, agentVersion string) (int, error) { +func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, error) { - telemetryInterval, err := strconv.Atoi(telemetryIntervalProperty) + telemetryPushInterval, err := strconv.Atoi(telemetryPushIntervalProperty) if err != nil { - telemetryInterval = defaultTelemetryPushInterval + Log("Error Converting telemetryPushIntervalProperty %s. Using Default Interval... %d \n", telemetryPushIntervalProperty, defaultTelemetryPushInterval) + telemetryPushInterval = defaultTelemetryPushInterval } - ContainerLogTelemetryTicker = time.NewTicker(time.Second * time.Duration(telemetryInterval)) + ContainerLogTelemetryTicker = time.NewTicker(time.Second * time.Duration(telemetryPushInterval)) - encodedIkey := os.Getenv(envAppInsightsInstrumentationKey) + encodedIkey := os.Getenv(envAppInsightsAuth) if encodedIkey == "" { - Log("App Insights IKey missing in Environment Variables \n") - return -1, errors.New("Missing App Insights Instrumentation Key Environment Variable") + Log("Environment Variable Missing \n") + return -1, errors.New("Missing Environment Variable") } decIkey, err := base64.StdEncoding.DecodeString(encodedIkey) if err != nil { - Log("Error Decoding encoded Instrumentation key %s", err.Error()) + Log("Decoding Error %s", err.Error()) return -1, err } @@ -99,14 +101,16 @@ func initialize(telemetryIntervalProperty string, agentVersion string) (int, err } // SendContainerLogFlushRateMetric is a go-routine that flushes the data periodically (every 5 mins to App Insights) -func SendContainerLogFlushRateMetric(telemetryIntervalProperty string, agentVersion string) { +func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agentVersion string) { - ret, err := initialize(telemetryIntervalProperty, agentVersion) + ret, err := initialize(telemetryPushIntervalProperty, agentVersion) if ret != 0 || err != nil { Log("Error During Telemetry Initialization :%s", err.Error()) - return + runtime.Goexit() } + SendEvent(EventNameContainerLogInit, make(map[string]string)) + for ; true; <-ContainerLogTelemetryTicker.C { flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000 metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate) From 510ef9f95b8e5de04e7b5952e24458374d6cbf6b Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 12 Oct 2018 10:45:14 -0700 Subject: [PATCH 21/88] PR feedback --- installer/conf/td-agent-bit.conf | 10 ++++----- source/code/go/src/plugins/out_oms.go | 8 +++---- source/code/go/src/plugins/telemetry.go | 30 +++++++++++-------------- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 6849a3744..b01b3a352 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -17,8 +17,8 @@ Skip_Long_Lines On [OUTPUT] - Name oms - EnableTelemetry true - TelemetryPushInterval 300 - Match oms.container.log.* - AgentVersion internaltest1004-2 \ No newline at end of file + Name oms + EnableTelemetry true + TelemetryPushIntervalSeconds 300 + Match oms.container.log.* + AgentVersion internaltest1004-2 \ No newline at end of file diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index 2603368ab..732ae5216 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -21,13 +21,13 @@ func FLBPluginInit(ctx unsafe.Pointer) int { Log("Initializing out_oms go plugin for fluentbit") InitializePlugin(ContainerLogPluginConfFilePath) enableTelemetry := output.FLBPluginConfigKey(ctx, "EnableTelemetry") - telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushInterval") - agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion") - if strings.Compare(strings.ToLower(enableTelemetry), "true") == 0 { + telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushIntervalSeconds") + agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion") go SendContainerLogFlushRateMetric(telemetryPushInterval, agentVersion) } else { Log("Telemetry is not enabled for the plugin %s \n", output.FLBPluginConfigKey(ctx, "Name")) + return output.FLB_OK } return output.FLB_OK } @@ -58,7 +58,7 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { // FLBPluginExit exits the plugin func FLBPluginExit() int { - defer TelemetryShutdown() + ContainerLogTelemetryTicker.Stop() KubeSystemContainersRefreshTicker.Stop() ContainerImageNameRefreshTicker.Stop() return output.FLB_OK diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index c2f565a45..4396ea655 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -26,15 +26,15 @@ var ( ) const ( - clusterTypeACS = "ACS" - clusterTypeAKS = "AKS" - controllerTypeDaemonSet = "DaemonSet" - controllerTypeReplicaSet = "ReplicaSet" - envAKSResourceID = "AKS_RESOURCE_ID" - envACSResourceName = "ACS_RESOURCE_NAME" - envAppInsightsAuth = "APPLICATIONINSIGHTS_AUTH" - metricNameAvgFlushRate = "ContainerLogAvgRecordsFlushedPerSec" - defaultTelemetryPushInterval = 300 + clusterTypeACS = "ACS" + clusterTypeAKS = "AKS" + controllerTypeDaemonSet = "DaemonSet" + controllerTypeReplicaSet = "ReplicaSet" + envAKSResourceID = "AKS_RESOURCE_ID" + envACSResourceName = "ACS_RESOURCE_NAME" + envAppInsightsAuth = "APPLICATIONINSIGHTS_AUTH" + metricNameAvgFlushRate = "ContainerLogAvgRecordsFlushedPerSec" + defaultTelemetryPushIntervalSeconds = 300 // EventNameContainerLogInit name of the event EventNameContainerLogInit = "ContainerLogPluginInitialized" @@ -45,8 +45,8 @@ func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, telemetryPushInterval, err := strconv.Atoi(telemetryPushIntervalProperty) if err != nil { - Log("Error Converting telemetryPushIntervalProperty %s. Using Default Interval... %d \n", telemetryPushIntervalProperty, defaultTelemetryPushInterval) - telemetryPushInterval = defaultTelemetryPushInterval + Log("Error Converting telemetryPushIntervalProperty %s. Using Default Interval... %d \n", telemetryPushIntervalProperty, defaultTelemetryPushIntervalSeconds) + telemetryPushInterval = defaultTelemetryPushIntervalSeconds } ContainerLogTelemetryTicker = time.NewTicker(time.Second * time.Duration(telemetryPushInterval)) @@ -116,17 +116,13 @@ func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agent metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate) Log("Flushed Records : %f Time Taken : %f flush Rate : %f", FlushedRecordsCount, FlushedRecordsTimeTaken, flushRate) TelemetryClient.Track(metric) + DataUpdateMutex.Lock() FlushedRecordsCount = 0.0 FlushedRecordsTimeTaken = 0.0 + DataUpdateMutex.Unlock() } } -// TelemetryShutdown stops the ticker that sends data to App Insights periodically -func TelemetryShutdown() { - Log("Shutting down ContainerLog Telemetry\n") - ContainerLogTelemetryTicker.Stop() -} - // SendEvent sends an event to App Insights func SendEvent(eventName string, dimensions map[string]string) { // this is because the TelemetryClient is initialized in a different goroutine. A simple wait loop here is just waiting for it to be initialized. This will happen only for the init event. Any subsequent Event should work just fine From 684c39b63581fab69595885ec2c98942098be4f6 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Fri, 12 Oct 2018 15:44:25 -0700 Subject: [PATCH 22/88] PR feedback --- source/code/go/src/plugins/telemetry.go | 42 +++++++++---------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 4396ea655..621d88eec 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -81,19 +81,21 @@ func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, CommonProperties["ResourceGroupName"] = "" CommonProperties["ClusterName"] = "" CommonProperties["Region"] = "" + CommonProperties["AKS_RESOURCE_ID"] = "" } else { CommonProperties["ACSResourceName"] = "" + CommonProperties["AKS_RESOURCE_ID"] = aksResourceID splitStrings := strings.Split(aksResourceID, "/") - CommonProperties["SubscriptionID"] = splitStrings[2] - CommonProperties["ResourceGroupName"] = splitStrings[4] - CommonProperties["ClusterName"] = splitStrings[8] + if len(aksResourceID) > 0 && len(aksResourceID) < 10 { + CommonProperties["SubscriptionID"] = splitStrings[2] + CommonProperties["ResourceGroupName"] = splitStrings[4] + CommonProperties["ClusterName"] = splitStrings[8] + } CommonProperties["ClusterType"] = clusterTypeAKS region := os.Getenv("AKS_REGION") - if region != "" { - CommonProperties["Region"] = region - } + CommonProperties["Region"] = region } TelemetryClient.Context().CommonProperties = CommonProperties @@ -112,40 +114,26 @@ func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agent SendEvent(EventNameContainerLogInit, make(map[string]string)) for ; true; <-ContainerLogTelemetryTicker.C { + DataUpdateMutex.Lock() flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000 - metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate) Log("Flushed Records : %f Time Taken : %f flush Rate : %f", FlushedRecordsCount, FlushedRecordsTimeTaken, flushRate) - TelemetryClient.Track(metric) - DataUpdateMutex.Lock() FlushedRecordsCount = 0.0 FlushedRecordsTimeTaken = 0.0 DataUpdateMutex.Unlock() + metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate) + TelemetryClient.Track(metric) } } // SendEvent sends an event to App Insights func SendEvent(eventName string, dimensions map[string]string) { - // this is because the TelemetryClient is initialized in a different goroutine. A simple wait loop here is just waiting for it to be initialized. This will happen only for the init event. Any subsequent Event should work just fine - for TelemetryClient == nil { - Log("Waiting for Telemetry Client to be initialized") - time.Sleep(1 * time.Second) - } - - // take a copy so the CommonProperties can be restored later - _commonProps := make(map[string]string) - for k, v := range TelemetryClient.Context().CommonProperties { - _commonProps[k] = v - } + Log("Sending Event : %s\n", eventName) + event := appinsights.NewEventTelemetry(eventName) - // add any extra dimensions + // add any extra Properties for k, v := range dimensions { - TelemetryClient.Context().CommonProperties[k] = v + event.Properties[k] = v } - Log("Sending Event : %s\n", eventName) - event := appinsights.NewEventTelemetry(eventName) TelemetryClient.Track(event) - - // restore original CommonProperties - TelemetryClient.Context().CommonProperties = _commonProps } From e165275bb8c346051cf851fb36dbb91ad7cf8afc Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Mon, 15 Oct 2018 15:14:41 -0700 Subject: [PATCH 23/88] Sending an event every 5 mins(Heartbeat) (#146) --- installer/conf/td-agent-bit.conf | 2 -- source/code/go/src/plugins/telemetry.go | 7 ++++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index b01b3a352..2553f405f 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -12,8 +12,6 @@ Parser docker Mem_Buf_Limit 30m Path_Key filepath - Buffer_Chunk_Size 1m - Buffer_Max_Size 1m Skip_Long_Lines On [OUTPUT] diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 621d88eec..b1bc4439b 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -36,8 +36,8 @@ const ( metricNameAvgFlushRate = "ContainerLogAvgRecordsFlushedPerSec" defaultTelemetryPushIntervalSeconds = 300 - // EventNameContainerLogInit name of the event - EventNameContainerLogInit = "ContainerLogPluginInitialized" + eventNameContainerLogInit = "ContainerLogPluginInitialized" + eventNameDaemonSetHeartbeat = "ContainerLogDaemonSetHeartbeatEvent" ) // Initialize initializes the telemetry artifacts @@ -111,9 +111,10 @@ func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agent runtime.Goexit() } - SendEvent(EventNameContainerLogInit, make(map[string]string)) + SendEvent(eventNameContainerLogInit, make(map[string]string)) for ; true; <-ContainerLogTelemetryTicker.C { + SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string)) DataUpdateMutex.Lock() flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000 Log("Flushed Records : %f Time Taken : %f flush Rate : %f", FlushedRecordsCount, FlushedRecordsTimeTaken, flushRate) From cfe1ca94c259c533a938834a54f1279e703d7e4b Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Tue, 16 Oct 2018 13:03:30 -0700 Subject: [PATCH 24/88] PR feedback to cleanup removed workflows --- installer/conf/container.conf | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 1916300cb..17317871c 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -46,7 +46,7 @@ # Filter for correct format to endpoint - + type filter_container @@ -63,19 +63,6 @@ max_retry_wait 9m - - type out_oms_api - log_level debug - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_containerprocess*.buffer - buffer_queue_limit 20 - flush_interval 20s - retry_limit 10 - retry_wait 15s - max_retry_wait 9m - - type out_oms log_level debug @@ -102,19 +89,6 @@ max_retry_wait 9m - - type out_oms - log_level debug - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_servicelog*.buffer - buffer_queue_limit 20 - flush_interval 20s - retry_limit 10 - retry_wait 15s - max_retry_wait 9m - - type out_oms log_level debug From 892b51c6b166cf10424bf5b6768633f44aa4cfa7 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Tue, 16 Oct 2018 13:04:55 -0700 Subject: [PATCH 25/88] updating agent version for telemetry --- installer/conf/td-agent-bit.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 2553f405f..667f2edc2 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -19,4 +19,4 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion internaltest1004-2 \ No newline at end of file + AgentVersion ciprod10162018 From 9c83160dfa92a4f9ae1ab2b010678148aab4fc4d Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Tue, 16 Oct 2018 19:33:43 -0700 Subject: [PATCH 26/88] updating agent version --- installer/conf/td-agent-bit.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 667f2edc2..b39587a97 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -19,4 +19,4 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion ciprod10162018 + AgentVersion ciprod10162018-2 From f0b5a61ea7597d8044f0ef3347f3258996c97c39 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Thu, 25 Oct 2018 11:17:39 -0700 Subject: [PATCH 27/88] Telemetry Updates (#149) * Telemetry Fixes 1. Added Log Generation Rate 2. Fixed parsing bugs 3. Added code to send Exceptions/errors * PR Feedback --- source/code/go/src/plugins/oms.go | 78 +++++++++++++++++++------ source/code/go/src/plugins/out_oms.go | 3 - source/code/go/src/plugins/telemetry.go | 29 ++++++--- source/code/go/src/plugins/utils.go | 8 ++- 4 files changed, 88 insertions(+), 30 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 807e00937..665c3f9f2 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -55,15 +55,18 @@ var ( IgnoreIDSet map[string]bool // DataUpdateMutex read and write mutex access to the container id set DataUpdateMutex = &sync.Mutex{} + // ContainerLogTelemetryMutex read and write mutex access to the Container Log Telemetry + ContainerLogTelemetryMutex = &sync.Mutex{} + // ClientSet for querying KubeAPIs ClientSet *kubernetes.Clientset ) var ( // KubeSystemContainersRefreshTicker updates the kube-system containers - KubeSystemContainersRefreshTicker = time.NewTicker(time.Second * 300) + KubeSystemContainersRefreshTicker *time.Ticker // ContainerImageNameRefreshTicker updates the container image and names periodically - ContainerImageNameRefreshTicker = time.NewTicker(time.Second * 60) + ContainerImageNameRefreshTicker *time.Ticker ) var ( @@ -99,6 +102,7 @@ func createLogger() *log.Logger { fmt.Printf("File Exists. Opening file in append mode...\n") logfile, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0600) if err != nil { + SendException(err.Error()) fmt.Printf(err.Error()) } } @@ -107,6 +111,7 @@ func createLogger() *log.Logger { fmt.Printf("File Doesnt Exist. Creating file...\n") logfile, err = os.Create(path) if err != nil { + SendException(err.Error()) fmt.Printf(err.Error()) } } @@ -134,7 +139,9 @@ func updateContainerImageNameMaps() { pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{}) if err != nil { - Log("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) + message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) + Log(message) + SendException(message) continue } @@ -171,7 +178,9 @@ func updateKubeSystemContainerIDs() { pods, err := ClientSet.CoreV1().Pods("kube-system").List(metav1.ListOptions{}) if err != nil { - Log("Error getting pods %s\nIt is ok to log here and continue. Kube-system logs will be collected", err.Error()) + message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue. Kube-system logs will be collected", err.Error()) + SendException(message) + Log(message) continue } @@ -194,17 +203,29 @@ func updateKubeSystemContainerIDs() { // PostDataHelper sends data to the OMS endpoint func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { - defer DataUpdateMutex.Unlock() - start := time.Now() var dataItems []DataItem + ignoreIDSet := make(map[string]bool) + imageIDMap := make(map[string]string) + nameIDMap := make(map[string]string) + DataUpdateMutex.Lock() + for k, v := range IgnoreIDSet { + ignoreIDSet[k] = v + } + for k, v := range ImageIDMap { + imageIDMap[k] = v + } + for k, v := range NameIDMap { + nameIDMap[k] = v + } + DataUpdateMutex.Unlock() for _, record := range tailPluginRecords { containerID := GetContainerIDFromFilePath(toString(record["filepath"])) - if containerID == "" || containsKey(IgnoreIDSet, containerID) { + if containerID == "" || containsKey(ignoreIDSet, containerID) { continue } @@ -216,13 +237,13 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { stringMap["SourceSystem"] = "Containers" stringMap["Id"] = containerID - if val, ok := ImageIDMap[containerID]; ok { + if val, ok := imageIDMap[containerID]; ok { stringMap["Image"] = val } else { Log("ContainerId %s not present in Map ", containerID) } - if val, ok := NameIDMap[containerID]; ok { + if val, ok := nameIDMap[containerID]; ok { stringMap["Name"] = val } else { Log("ContainerId %s not present in Map ", containerID) @@ -250,7 +271,9 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { marshalled, err := json.Marshal(logEntry) if err != nil { - Log("Error while Marshalling log Entry: %s", err.Error()) + message := fmt.Sprintf("Error while Marshalling log Entry: %s", err.Error()) + Log(message) + SendException(message) return output.FLB_OK } req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) @@ -260,8 +283,11 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { elapsed := time.Since(start) if err != nil { - Log("Error when sending request %s \n", err.Error()) + message := fmt.Sprintf("Error when sending request %s \n", err.Error()) + Log(message) + SendException(message) Log("Failed to flush %d records after %s", len(dataItems), elapsed) + return output.FLB_RETRY } @@ -274,8 +300,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { numRecords := len(dataItems) Log("Successfully flushed %d records in %s", numRecords, elapsed) + ContainerLogTelemetryMutex.Lock() FlushedRecordsCount += float64(numRecords) FlushedRecordsTimeTaken += float64(elapsed / time.Millisecond) + ContainerLogTelemetryMutex.Unlock() } return output.FLB_OK @@ -318,13 +346,17 @@ func InitializePlugin(pluginConfPath string) { pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { - Log("Error Reading plugin config path : %s \n", err.Error()) - log.Fatalf("Error Reading plugin config path : %s \n", err.Error()) + message := fmt.Sprintf("Error Reading plugin config path : %s \n", err.Error()) + Log(message) + SendException(message) + time.Sleep(30 * time.Second) + log.Fatalln(message) } omsadminConf, err := ReadConfiguration(pluginConfig["omsadmin_conf_path"]) if err != nil { Log(err.Error()) + SendException(err.Error()) log.Fatalf("Error Reading omsadmin configuration %s\n", err.Error()) } OMSEndpoint = omsadminConf["OMS_ENDPOINT"] @@ -334,7 +366,9 @@ func InitializePlugin(pluginConfPath string) { // Initialize image,name map refresh ticker containerInventoryRefreshInterval, err := strconv.Atoi(pluginConfig["container_inventory_refresh_interval"]) if err != nil { - Log("Error Reading Container Inventory Refresh Interval %s", err.Error()) + message := fmt.Sprintf("Error Reading Container Inventory Refresh Interval %s", err.Error()) + Log(message) + SendException(message) Log("Using Default Refresh Interval of %d s\n", defaultContainerInventoryRefreshInterval) containerInventoryRefreshInterval = defaultContainerInventoryRefreshInterval } @@ -344,7 +378,9 @@ func InitializePlugin(pluginConfPath string) { // Initialize Kube System Refresh Ticker kubeSystemContainersRefreshInterval, err := strconv.Atoi(pluginConfig["kube_system_containers_refresh_interval"]) if err != nil { - Log("Error Reading Kube System Container Ids Refresh Interval %s", err.Error()) + message := fmt.Sprintf("Error Reading Kube System Container Ids Refresh Interval %s", err.Error()) + Log(message) + SendException(message) Log("Using Default Refresh Interval of %d s\n", defaultKubeSystemContainersRefreshInterval) kubeSystemContainersRefreshInterval = defaultKubeSystemContainersRefreshInterval } @@ -356,7 +392,9 @@ func InitializePlugin(pluginConfPath string) { if err != nil { // It is ok to log here and continue, because only the Computer column will be missing, // which can be deduced from a combination of containerId, and docker logs on the node - Log("Error when reading containerHostName file %s.\n It is ok to log here and continue, because only the Computer column will be missing, which can be deduced from a combination of containerId, and docker logs on the nodes\n", err.Error()) + message := fmt.Sprintf("Error when reading containerHostName file %s.\n It is ok to log here and continue, because only the Computer column will be missing, which can be deduced from a combination of containerId, and docker logs on the nodes\n", err.Error()) + Log(message) + SendException(message) } Computer = strings.TrimSuffix(toString(containerHostName), "\n") Log("Computer == %s \n", Computer) @@ -364,12 +402,16 @@ func InitializePlugin(pluginConfPath string) { // Initialize KubeAPI Client config, err := rest.InClusterConfig() if err != nil { - Log("Error getting config %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) + message := fmt.Sprintf("Error getting config %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) + Log(message) + SendException(message) } ClientSet, err = kubernetes.NewForConfig(config) if err != nil { - Log("Error getting clientset %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) + message := fmt.Sprintf("Error getting clientset %s.\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) + SendException(message) + Log(message) } PluginConfiguration = pluginConfig diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index 732ae5216..e2ee324e7 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -34,7 +34,6 @@ func FLBPluginInit(ctx unsafe.Pointer) int { //export FLBPluginFlush func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { - var count int var ret int var record map[interface{}]interface{} var records []map[interface{}]interface{} @@ -43,7 +42,6 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { dec := output.NewDecoder(data, int(length)) // Iterate Records - count = 0 for { // Extract Record ret, _, record = output.GetRecord(dec) @@ -51,7 +49,6 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { break } records = append(records, record) - count++ } return PostDataHelper(records) } diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index b1bc4439b..72454948d 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -34,13 +34,14 @@ const ( envACSResourceName = "ACS_RESOURCE_NAME" envAppInsightsAuth = "APPLICATIONINSIGHTS_AUTH" metricNameAvgFlushRate = "ContainerLogAvgRecordsFlushedPerSec" + metricNameAvgLogGenerationRate = "ContainerLogsGeneratedPerSec" defaultTelemetryPushIntervalSeconds = 300 eventNameContainerLogInit = "ContainerLogPluginInitialized" eventNameDaemonSetHeartbeat = "ContainerLogDaemonSetHeartbeatEvent" ) -// Initialize initializes the telemetry artifacts +// initialize initializes the telemetry artifacts func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, error) { telemetryPushInterval, err := strconv.Atoi(telemetryPushIntervalProperty) @@ -87,7 +88,7 @@ func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, CommonProperties["ACSResourceName"] = "" CommonProperties["AKS_RESOURCE_ID"] = aksResourceID splitStrings := strings.Split(aksResourceID, "/") - if len(aksResourceID) > 0 && len(aksResourceID) < 10 { + if len(splitStrings) > 0 && len(splitStrings) < 10 { CommonProperties["SubscriptionID"] = splitStrings[2] CommonProperties["ResourceGroupName"] = splitStrings[4] CommonProperties["ClusterName"] = splitStrings[8] @@ -110,19 +111,24 @@ func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agent Log("Error During Telemetry Initialization :%s", err.Error()) runtime.Goexit() } - + start := time.Now() SendEvent(eventNameContainerLogInit, make(map[string]string)) for ; true; <-ContainerLogTelemetryTicker.C { SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string)) - DataUpdateMutex.Lock() + elapsed := time.Since(start) + ContainerLogTelemetryMutex.Lock() flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000 - Log("Flushed Records : %f Time Taken : %f flush Rate : %f", FlushedRecordsCount, FlushedRecordsTimeTaken, flushRate) + logRate := FlushedRecordsCount / float64(elapsed/time.Second) FlushedRecordsCount = 0.0 FlushedRecordsTimeTaken = 0.0 - DataUpdateMutex.Unlock() - metric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate) - TelemetryClient.Track(metric) + ContainerLogTelemetryMutex.Unlock() + + flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate) + TelemetryClient.Track(flushRateMetric) + logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate) + TelemetryClient.Track(logRateMetric) + start = time.Now() } } @@ -138,3 +144,10 @@ func SendEvent(eventName string, dimensions map[string]string) { TelemetryClient.Track(event) } + +// SendException send an event to the configured app insights instance +func SendException(err interface{}) { + if TelemetryClient != nil { + TelemetryClient.TrackException(err) + } +} diff --git a/source/code/go/src/plugins/utils.go b/source/code/go/src/plugins/utils.go index 1ac9b05a9..94db033bd 100644 --- a/source/code/go/src/plugins/utils.go +++ b/source/code/go/src/plugins/utils.go @@ -3,6 +3,7 @@ package main import ( "bufio" "crypto/tls" + "fmt" "log" "net/http" "os" @@ -19,7 +20,9 @@ func ReadConfiguration(filename string) (map[string]string, error) { file, err := os.Open(filename) if err != nil { + SendException(err) log.Fatal(err) + return nil, err } defer file.Close() @@ -39,6 +42,7 @@ func ReadConfiguration(filename string) (map[string]string, error) { } if err := scanner.Err(); err != nil { + SendException(err) log.Fatal(err) return nil, err } @@ -51,7 +55,9 @@ func CreateHTTPClient() { cert, err := tls.LoadX509KeyPair(PluginConfiguration["cert_file_path"], PluginConfiguration["key_file_path"]) if err != nil { - Log("Error when loading cert %s", err.Error()) + message := fmt.Sprintf("Error when loading cert %s", err.Error()) + SendException(message) + Log(message) log.Fatalf("Error when loading cert %s", err.Error()) } From a58998ec5a03b3a4bd502a9fb7be5e0bdfd3eee2 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Tue, 30 Oct 2018 09:52:36 -0700 Subject: [PATCH 28/88] Changes to send omsagent/omsagent-rs kubectl logs to App Insights (#159) * Changes to send omsagent/omsagent-rs kubectl logs to App Insights * PR Feedback --- installer/conf/td-agent-bit.conf | 9 +++ source/code/go/src/plugins/oms.go | 37 +++++---- source/code/go/src/plugins/out_oms.go | 12 ++- source/code/go/src/plugins/telemetry.go | 102 +++++++++++++----------- source/code/go/src/plugins/utils.go | 21 ++++- 5 files changed, 107 insertions(+), 74 deletions(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index b39587a97..2a6199987 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -14,6 +14,15 @@ Path_Key filepath Skip_Long_Lines On +[INPUT] + Name tail + Tag oms.container.log.flbplugin.* + Path /var/log/containers/omsagent*.log + DB /var/opt/microsoft/docker-cimprov/state/omsagent-ai.db + Mem_Buf_Limit 30m + Path_Key filepath + Skip_Long_Lines On + [OUTPUT] Name oms EnableTelemetry true diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 665c3f9f2..e0abaea1f 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -223,7 +223,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { for _, record := range tailPluginRecords { - containerID := GetContainerIDFromFilePath(toString(record["filepath"])) + containerID := GetContainerIDFromFilePath(ToString(record["filepath"])) if containerID == "" || containsKey(ignoreIDSet, containerID) { continue @@ -231,9 +231,9 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { stringMap := make(map[string]string) - stringMap["LogEntry"] = toString(record["log"]) - stringMap["LogEntrySource"] = toString(record["stream"]) - stringMap["LogEntryTimeStamp"] = toString(record["time"]) + stringMap["LogEntry"] = ToString(record["log"]) + stringMap["LogEntrySource"] = ToString(record["stream"]) + stringMap["LogEntryTimeStamp"] = ToString(record["time"]) stringMap["SourceSystem"] = "Containers" stringMap["Id"] = containerID @@ -314,16 +314,6 @@ func containsKey(currentMap map[string]bool, key string) bool { return c } -func toString(s interface{}) string { - switch t := s.(type) { - case []byte: - // prevent encoding to base64 - return string(t) - default: - return "" - } -} - // GetContainerIDFromFilePath Gets the container ID From the file Path func GetContainerIDFromFilePath(filepath string) string { start := strings.LastIndex(filepath, "-") @@ -338,12 +328,19 @@ func GetContainerIDFromFilePath(filepath string) string { } // InitializePlugin reads and populates plugin configuration -func InitializePlugin(pluginConfPath string) { +func InitializePlugin(pluginConfPath string, agentVersion string) { IgnoreIDSet = make(map[string]bool) ImageIDMap = make(map[string]string) NameIDMap = make(map[string]string) + ret, err := InitializeTelemetryClient(agentVersion) + if ret != 0 || err != nil { + message := fmt.Sprintf("Error During Telemetry Initialization :%s", err.Error()) + fmt.Printf(message) + Log(message) + } + pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { message := fmt.Sprintf("Error Reading plugin config path : %s \n", err.Error()) @@ -355,9 +352,11 @@ func InitializePlugin(pluginConfPath string) { omsadminConf, err := ReadConfiguration(pluginConfig["omsadmin_conf_path"]) if err != nil { - Log(err.Error()) - SendException(err.Error()) - log.Fatalf("Error Reading omsadmin configuration %s\n", err.Error()) + message := fmt.Sprintf("Error Reading omsadmin configuration %s\n", err.Error()) + Log(message) + SendException(message) + time.Sleep(30 * time.Second) + log.Fatalln(message) } OMSEndpoint = omsadminConf["OMS_ENDPOINT"] WorkspaceID = omsadminConf["WORKSPACE_ID"] @@ -396,7 +395,7 @@ func InitializePlugin(pluginConfPath string) { Log(message) SendException(message) } - Computer = strings.TrimSuffix(toString(containerHostName), "\n") + Computer = strings.TrimSuffix(ToString(containerHostName), "\n") Log("Computer == %s \n", Computer) // Initialize KubeAPI Client diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index e2ee324e7..133e0f039 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -19,12 +19,12 @@ func FLBPluginRegister(ctx unsafe.Pointer) int { // ctx (context) pointer to fluentbit context (state/ c code) func FLBPluginInit(ctx unsafe.Pointer) int { Log("Initializing out_oms go plugin for fluentbit") - InitializePlugin(ContainerLogPluginConfFilePath) + agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion") + InitializePlugin(ContainerLogPluginConfFilePath, agentVersion) enableTelemetry := output.FLBPluginConfigKey(ctx, "EnableTelemetry") if strings.Compare(strings.ToLower(enableTelemetry), "true") == 0 { telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushIntervalSeconds") - agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion") - go SendContainerLogFlushRateMetric(telemetryPushInterval, agentVersion) + go SendContainerLogPluginMetrics(telemetryPushInterval) } else { Log("Telemetry is not enabled for the plugin %s \n", output.FLBPluginConfigKey(ctx, "Name")) return output.FLB_OK @@ -50,6 +50,12 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { } records = append(records, record) } + + incomingTag := C.GoString(tag) + if strings.Contains(strings.ToLower(incomingTag), "oms.container.log.flbplugin") { + return PushToAppInsightsTraces(records) + } + return PostDataHelper(records) } diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 72454948d..d943c8eda 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -4,12 +4,12 @@ import ( "encoding/base64" "errors" "os" - "runtime" "strconv" "strings" "time" "github.com/Microsoft/ApplicationInsights-Go/appinsights" + "github.com/fluent/fluent-bit-go/output" ) var ( @@ -41,8 +41,8 @@ const ( eventNameDaemonSetHeartbeat = "ContainerLogDaemonSetHeartbeatEvent" ) -// initialize initializes the telemetry artifacts -func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, error) { +// SendContainerLogPluginMetrics is a go-routine that flushes the data periodically (every 5 mins to App Insights) +func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { telemetryPushInterval, err := strconv.Atoi(telemetryPushIntervalProperty) if err != nil { @@ -52,6 +52,49 @@ func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, ContainerLogTelemetryTicker = time.NewTicker(time.Second * time.Duration(telemetryPushInterval)) + start := time.Now() + SendEvent(eventNameContainerLogInit, make(map[string]string)) + + for ; true; <-ContainerLogTelemetryTicker.C { + SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string)) + elapsed := time.Since(start) + ContainerLogTelemetryMutex.Lock() + flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000 + logRate := FlushedRecordsCount / float64(elapsed/time.Second) + FlushedRecordsCount = 0.0 + FlushedRecordsTimeTaken = 0.0 + ContainerLogTelemetryMutex.Unlock() + + flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate) + TelemetryClient.Track(flushRateMetric) + logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate) + TelemetryClient.Track(logRateMetric) + start = time.Now() + } +} + +// SendEvent sends an event to App Insights +func SendEvent(eventName string, dimensions map[string]string) { + Log("Sending Event : %s\n", eventName) + event := appinsights.NewEventTelemetry(eventName) + + // add any extra Properties + for k, v := range dimensions { + event.Properties[k] = v + } + + TelemetryClient.Track(event) +} + +// SendException send an event to the configured app insights instance +func SendException(err interface{}) { + if TelemetryClient != nil { + TelemetryClient.TrackException(err) + } +} + +// InitializeTelemetryClient sets up the telemetry client to send telemetry to the App Insights instance +func InitializeTelemetryClient(agentVersion string) (int, error) { encodedIkey := os.Getenv(envAppInsightsAuth) if encodedIkey == "" { Log("Environment Variable Missing \n") @@ -103,51 +146,14 @@ func initialize(telemetryPushIntervalProperty string, agentVersion string) (int, return 0, nil } -// SendContainerLogFlushRateMetric is a go-routine that flushes the data periodically (every 5 mins to App Insights) -func SendContainerLogFlushRateMetric(telemetryPushIntervalProperty string, agentVersion string) { - - ret, err := initialize(telemetryPushIntervalProperty, agentVersion) - if ret != 0 || err != nil { - Log("Error During Telemetry Initialization :%s", err.Error()) - runtime.Goexit() - } - start := time.Now() - SendEvent(eventNameContainerLogInit, make(map[string]string)) - - for ; true; <-ContainerLogTelemetryTicker.C { - SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string)) - elapsed := time.Since(start) - ContainerLogTelemetryMutex.Lock() - flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000 - logRate := FlushedRecordsCount / float64(elapsed/time.Second) - FlushedRecordsCount = 0.0 - FlushedRecordsTimeTaken = 0.0 - ContainerLogTelemetryMutex.Unlock() - - flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate) - TelemetryClient.Track(flushRateMetric) - logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate) - TelemetryClient.Track(logRateMetric) - start = time.Now() - } -} - -// SendEvent sends an event to App Insights -func SendEvent(eventName string, dimensions map[string]string) { - Log("Sending Event : %s\n", eventName) - event := appinsights.NewEventTelemetry(eventName) - - // add any extra Properties - for k, v := range dimensions { - event.Properties[k] = v +// PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance +func PushToAppInsightsTraces(records []map[interface{}]interface{}) int { + var logLines []string + for _, record := range records { + logLines = append(logLines, ToString(record["log"])) } - TelemetryClient.Track(event) -} - -// SendException send an event to the configured app insights instance -func SendException(err interface{}) { - if TelemetryClient != nil { - TelemetryClient.TrackException(err) - } + traceEntry := strings.Join(logLines, "\n") + TelemetryClient.TrackTrace(traceEntry, 1) + return output.FLB_OK } diff --git a/source/code/go/src/plugins/utils.go b/source/code/go/src/plugins/utils.go index 94db033bd..91e433a0f 100644 --- a/source/code/go/src/plugins/utils.go +++ b/source/code/go/src/plugins/utils.go @@ -8,6 +8,7 @@ import ( "net/http" "os" "strings" + "time" ) // ReadConfiguration reads a property file @@ -21,8 +22,8 @@ func ReadConfiguration(filename string) (map[string]string, error) { file, err := os.Open(filename) if err != nil { SendException(err) - log.Fatal(err) - + time.Sleep(30 * time.Second) + fmt.Printf("%s", err.Error()) return nil, err } defer file.Close() @@ -43,7 +44,8 @@ func ReadConfiguration(filename string) (map[string]string, error) { if err := scanner.Err(); err != nil { SendException(err) - log.Fatal(err) + time.Sleep(30 * time.Second) + log.Fatalf("%s", err.Error()) return nil, err } @@ -52,11 +54,11 @@ func ReadConfiguration(filename string) (map[string]string, error) { // CreateHTTPClient used to create the client for sending post requests to OMSEndpoint func CreateHTTPClient() { - cert, err := tls.LoadX509KeyPair(PluginConfiguration["cert_file_path"], PluginConfiguration["key_file_path"]) if err != nil { message := fmt.Sprintf("Error when loading cert %s", err.Error()) SendException(message) + time.Sleep(30 * time.Second) Log(message) log.Fatalf("Error when loading cert %s", err.Error()) } @@ -72,3 +74,14 @@ func CreateHTTPClient() { Log("Successfully created HTTP Client") } + +// ToString converts an interface into a string +func ToString(s interface{}) string { + switch t := s.(type) { + case []byte: + // prevent encoding to base64 + return string(t) + default: + return "" + } +} From 4c2da9f831d5aa39edc3c0096ad639f3c01243a1 Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Mon, 5 Nov 2018 15:46:02 -0800 Subject: [PATCH 29/88] Rashmi/fluentd docker inventory (#160) * first stab * changes * changes * docker util changes * working tested util * input plugin and conf * changes * changes * changes * changes * changes * working containerinventory * fixing omi removal from container.conf * removing comments * file write and read * deleted containers working * changes * changes * socket timeout * deleting test files * adding log * fixing comment * appinsights changes * changes * tel changes * changes * changes * changes * changes * lib changes * changes * changes * fixes * PR comments * changes * updating the ownership * changes * changes * changes to container data * removing comment * changes * adding collection time * bug fix * env string truncation * changes for acs-engine test --- installer/conf/container.conf | 46 +-- installer/datafiles/base_container.data | 61 +++- .../code/plugin/ApplicationInsightsUtility.rb | 142 ++++++++++ source/code/plugin/ContainerInventoryState.rb | 65 +++++ source/code/plugin/DockerApiClient.rb | 162 +++++++++++ source/code/plugin/DockerApiRestHelper.rb | 55 ++++ source/code/plugin/in_containerinventory.rb | 266 ++++++++++++++++++ .../code/plugin/lib/application_insights.rb | 9 + .../channel/asynchronous_queue.rb | 58 ++++ .../channel/asynchronous_sender.rb | 133 +++++++++ .../channel/contracts/application.rb | 13 + .../channel/contracts/availability_data.rb | 34 +++ .../channel/contracts/base.rb | 13 + .../channel/contracts/cloud.rb | 14 + .../channel/contracts/data.rb | 14 + .../channel/contracts/data_point.rb | 25 ++ .../channel/contracts/data_point_type.rb | 7 + .../channel/contracts/dependency_kind.rb | 9 + .../contracts/dependency_source_type.rb | 9 + .../channel/contracts/device.rb | 18 ++ .../channel/contracts/domain.rb | 10 + .../channel/contracts/envelope.rb | 32 +++ .../channel/contracts/event_data.rb | 28 ++ .../channel/contracts/exception_data.rb | 35 +++ .../channel/contracts/exception_details.rb | 28 ++ .../channel/contracts/internal.rb | 15 + .../channel/contracts/json_serializable.rb | 59 ++++ .../channel/contracts/location.rb | 13 + .../channel/contracts/message_data.rb | 24 ++ .../channel/contracts/metric_data.rb | 27 ++ .../channel/contracts/operation.rb | 17 ++ .../channel/contracts/page_view_data.rb | 33 +++ .../channel/contracts/page_view_perf_data.rb | 39 +++ .../contracts/remote_dependency_data.rb | 40 +++ .../channel/contracts/reopenings.rb | 27 ++ .../channel/contracts/request_data.rb | 35 +++ .../channel/contracts/session.rb | 14 + .../channel/contracts/severity_level.rb | 13 + .../channel/contracts/stack_frame.rb | 17 ++ .../channel/contracts/user.rb | 15 + .../lib/application_insights/channel/event.rb | 68 +++++ .../channel/queue_base.rb | 73 +++++ .../channel/sender_base.rb | 88 ++++++ .../channel/synchronous_queue.rb | 45 +++ .../channel/synchronous_sender.rb | 17 ++ .../channel/telemetry_channel.rb | 131 +++++++++ .../channel/telemetry_context.rb | 85 ++++++ .../rack/track_request.rb | 154 ++++++++++ .../application_insights/telemetry_client.rb | 232 +++++++++++++++ .../unhandled_exception.rb | 49 ++++ .../lib/application_insights/version.rb | 3 + 51 files changed, 2581 insertions(+), 38 deletions(-) create mode 100644 source/code/plugin/ApplicationInsightsUtility.rb create mode 100644 source/code/plugin/ContainerInventoryState.rb create mode 100644 source/code/plugin/DockerApiClient.rb create mode 100644 source/code/plugin/DockerApiRestHelper.rb create mode 100644 source/code/plugin/in_containerinventory.rb create mode 100644 source/code/plugin/lib/application_insights.rb create mode 100644 source/code/plugin/lib/application_insights/channel/asynchronous_queue.rb create mode 100644 source/code/plugin/lib/application_insights/channel/asynchronous_sender.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/application.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/availability_data.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/base.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/cloud.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/data.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/data_point.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/data_point_type.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/dependency_kind.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/device.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/domain.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/envelope.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/event_data.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/exception_data.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/exception_details.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/internal.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/location.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/message_data.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/metric_data.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/operation.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/page_view_data.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/reopenings.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/request_data.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/session.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/severity_level.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/stack_frame.rb create mode 100644 source/code/plugin/lib/application_insights/channel/contracts/user.rb create mode 100644 source/code/plugin/lib/application_insights/channel/event.rb create mode 100644 source/code/plugin/lib/application_insights/channel/queue_base.rb create mode 100644 source/code/plugin/lib/application_insights/channel/sender_base.rb create mode 100644 source/code/plugin/lib/application_insights/channel/synchronous_queue.rb create mode 100644 source/code/plugin/lib/application_insights/channel/synchronous_sender.rb create mode 100644 source/code/plugin/lib/application_insights/channel/telemetry_channel.rb create mode 100644 source/code/plugin/lib/application_insights/channel/telemetry_context.rb create mode 100644 source/code/plugin/lib/application_insights/rack/track_request.rb create mode 100644 source/code/plugin/lib/application_insights/telemetry_client.rb create mode 100644 source/code/plugin/lib/application_insights/unhandled_exception.rb create mode 100644 source/code/plugin/lib/application_insights/version.rb diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 17317871c..798bd8eb6 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -9,22 +9,10 @@ # Container inventory - type omi - run_interval 60s - tag oms.container.containerinventory - items [ - ["root/cimv2","Container_ContainerInventory"] - ] - - -# Image inventory - - type omi - run_interval 60s - tag oms.container.imageinventory - items [ - ["root/cimv2","Container_ImageInventory"] - ] + type containerinventory + tag oms.containerinsights.containerinventory + run_interval 60s + log_level debug # Container host inventory @@ -45,11 +33,6 @@ log_level debug -# Filter for correct format to endpoint - - type filter_container - - type out_oms_api log_level debug @@ -63,33 +46,22 @@ max_retry_wait 9m - + type out_oms log_level debug + num_threads 5 buffer_chunk_limit 20m buffer_type file buffer_path %STATE_DIR_WS%/out_oms_containerinventory*.buffer buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk flush_interval 20s retry_limit 10 - retry_wait 15s - max_retry_wait 9m - - - - type out_oms - log_level debug - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_imageinventory*.buffer - buffer_queue_limit 20 - flush_interval 20s - retry_limit 10 - retry_wait 15s + retry_wait 30s max_retry_wait 9m - + type out_oms log_level debug num_threads 5 diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 85a128b2a..7181929e2 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -37,6 +37,57 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/in_kube_services.rb; source/code/plugin/in_kube_services.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_kube_nodes.rb; source/code/plugin/in_kube_nodes.rb; 644; root; root +/opt/microsoft/omsagent/plugin/ApplicationInsightsUtility.rb; source/code/plugin/ApplicationInsightsUtility.rb; 644; root; root +/opt/microsoft/omsagent/plugin/ContainerInventoryState.rb; source/code/plugin/ContainerInventoryState.rb; 644; root; root +/opt/microsoft/omsagent/plugin/DockerApiClient.rb; source/code/plugin/DockerApiClient.rb; 644; root; root +/opt/microsoft/omsagent/plugin/DockerApiRestHelper.rb; source/code/plugin/DockerApiRestHelper.rb; 644; root; root +/opt/microsoft/omsagent/plugin/in_containerinventory.rb; source/code/plugin/in_containerinventory.rb; 644; root; root + +/opt/microsoft/omsagent/plugin/lib/application_insights/version.rb; source/code/plugin/lib/application_insights/version.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/rack/track_request.rb; source/code/plugin/lib/application_insights/rack/track_request.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/unhandled_exception.rb; source/code/plugin/lib/application_insights/unhandled_exception.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/telemetry_client.rb; source/code/plugin/lib/application_insights/telemetry_client.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/queue_base.rb; source/code/plugin/lib/application_insights/channel/queue_base.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/asynchronous_queue.rb; source/code/plugin/lib/application_insights/channel/asynchronous_queue.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/synchronous_sender.rb; source/code/plugin/lib/application_insights/channel/synchronous_sender.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data_point_type.rb; source/code/plugin/lib/application_insights/channel/contracts/data_point_type.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data_point.rb; source/code/plugin/lib/application_insights/channel/contracts/data_point.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/stack_frame.rb; source/code/plugin/lib/application_insights/channel/contracts/stack_frame.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/request_data.rb; source/code/plugin/lib/application_insights/channel/contracts/request_data.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/session.rb; source/code/plugin/lib/application_insights/channel/contracts/session.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/page_view_data.rb; source/code/plugin/lib/application_insights/channel/contracts/page_view_data.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb; source/code/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/exception_data.rb; source/code/plugin/lib/application_insights/channel/contracts/exception_data.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/location.rb; source/code/plugin/lib/application_insights/channel/contracts/location.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/operation.rb; source/code/plugin/lib/application_insights/channel/contracts/operation.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/data.rb; source/code/plugin/lib/application_insights/channel/contracts/data.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/event_data.rb; source/code/plugin/lib/application_insights/channel/contracts/event_data.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/metric_data.rb; source/code/plugin/lib/application_insights/channel/contracts/metric_data.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/device.rb; source/code/plugin/lib/application_insights/channel/contracts/device.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/message_data.rb; source/code/plugin/lib/application_insights/channel/contracts/message_data.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb; source/code/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/user.rb; source/code/plugin/lib/application_insights/channel/contracts/user.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/severity_level.rb; source/code/plugin/lib/application_insights/channel/contracts/severity_level.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/application.rb; source/code/plugin/lib/application_insights/channel/contracts/application.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/dependency_kind.rb; source/code/plugin/lib/application_insights/channel/contracts/dependency_kind.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/cloud.rb; source/code/plugin/lib/application_insights/channel/contracts/cloud.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/envelope.rb; source/code/plugin/lib/application_insights/channel/contracts/envelope.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/json_serializable.rb; source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/domain.rb; source/code/plugin/lib/application_insights/channel/contracts/domain.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/base.rb; source/code/plugin/lib/application_insights/channel/contracts/base.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/reopenings.rb; source/code/plugin/lib/application_insights/channel/contracts/reopenings.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb; source/code/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/internal.rb; source/code/plugin/lib/application_insights/channel/contracts/internal.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/availability_data.rb; source/code/plugin/lib/application_insights/channel/contracts/availability_data.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts/exception_details.rb; source/code/plugin/lib/application_insights/channel/contracts/exception_details.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/synchronous_queue.rb; source/code/plugin/lib/application_insights/channel/synchronous_queue.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/sender_base.rb; source/code/plugin/lib/application_insights/channel/sender_base.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/telemetry_context.rb; source/code/plugin/lib/application_insights/channel/telemetry_context.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/asynchronous_sender.rb; source/code/plugin/lib/application_insights/channel/asynchronous_sender.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/telemetry_channel.rb; source/code/plugin/lib/application_insights/channel/telemetry_channel.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/event.rb; source/code/plugin/lib/application_insights/channel/event.rb; 644; root; root +/opt/microsoft/omsagent/plugin/lib/application_insights.rb; source/code/plugin/lib/application_insights.rb; 644; root; root + /opt/td-agent-bit/bin/out_oms.so; intermediate/${{BUILD_CONFIGURATION}}/out_oms.so; 755; root; root /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf; installer/conf/td-agent-bit.conf; 644; root; root /etc/opt/microsoft/docker-cimprov/out_oms.conf; installer/conf/out_oms.conf; 644; root; root @@ -75,12 +126,17 @@ MAINTAINER: 'Microsoft Corporation' /var/opt/microsoft/docker-cimprov; 755; root; root /var/opt/microsoft/docker-cimprov/state; 755; root; root /var/opt/microsoft/docker-cimprov/state/ContainerInventory; 755; root; root -/var/opt/microsoft/docker-cimprov/state/ImageInventory; 755; root; root /var/opt/microsoft/docker-cimprov/log; 755; root; root /opt/td-agent-bit; 755; root; root;sysdir /opt/td-agent-bit/bin; 755; root; root;sysdir +/opt/microsoft/omsagent/plugin/lib; 755; root; root; sysdir +/opt/microsoft/omsagent/plugin/lib/application_insights; 755; root; root; sysdir +/opt/microsoft/omsagent/plugin/lib/application_insights/channel; 755; root; root; sysdir +/opt/microsoft/omsagent/plugin/lib/application_insights/channel/contracts; 755; root; root; sysdir +/opt/microsoft/omsagent/plugin/lib/application_insights/rack; 755; root; root; sysdir + %Dependencies %Postinstall_10 @@ -90,6 +146,9 @@ WriteInstallInfo() { } WriteInstallInfo +#Make omsagent owner for ContainerInventory directory. This is needed for ruby plugin to have access +chown omsagent:omsagent /var/opt/microsoft/docker-cimprov/state/ContainerInventory + # Get the state file in place with proper permissions touch /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt chmod 644 /var/opt/microsoft/docker-cimprov/state/LastEventQueryTime.txt diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb new file mode 100644 index 000000000..14fc9f2f8 --- /dev/null +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -0,0 +1,142 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +class ApplicationInsightsUtility + require_relative 'lib/application_insights' + require_relative 'omslog' + require_relative 'DockerApiClient' + require 'json' + require 'base64' + + @@HeartBeat = 'HeartBeatEvent' + @@Exception = 'ExceptionEvent' + @@AcsClusterType = 'ACS' + @@AksClusterType = 'AKS' + @@DaemonsetControllerType = 'DaemonSet' + @OmsAdminFilePath = '/etc/opt/microsoft/omsagent/conf/omsadmin.conf' + @@EnvAcsResourceName = 'ACS_RESOURCE_NAME' + @@EnvAksRegion = 'AKS_REGION' + @@EnvAgentVersion = 'AGENT_VERSION' + @@EnvApplicationInsightsKey = 'APPLICATIONINSIGHTS_AUTH' + @@CustomProperties = {} + @@Tc = nil + + def initialize + end + + class << self + #Set default properties for telemetry event + def initializeUtility() + begin + resourceInfo = ENV['AKS_RESOURCE_ID'] + if resourceInfo.nil? || resourceInfo.empty? + @@CustomProperties["ACSResourceName"] = ENV[@@EnvAcsResourceName] + @@CustomProperties["ClusterType"] = @@AcsClusterType + @@CustomProperties["SubscriptionID"] = "" + @@CustomProperties["ResourceGroupName"] = "" + @@CustomProperties["ClusterName"] = "" + @@CustomProperties["Region"] = "" + else + @@CustomProperties["AKS_RESOURCE_ID"] = resourceInfo + begin + splitStrings = resourceInfo.split('/') + subscriptionId = splitStrings[2] + resourceGroupName = splitStrings[4] + clusterName = splitStrings[8] + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: parsing AKS resourceId: #{resourceInfo}, error: #{errorStr}") + end + @@CustomProperties["ClusterType"] = @@AksClusterType + @@CustomProperties["SubscriptionID"] = subscriptionId + @@CustomProperties["ResourceGroupName"] = resourceGroupName + @@CustomProperties["ClusterName"] = clusterName + @@CustomProperties["Region"] = ENV[@@EnvAksRegion] + end + @@CustomProperties['ControllerType'] = @@DaemonsetControllerType + dockerInfo = DockerApiClient.dockerInfo + @@CustomProperties['DockerVersion'] = dockerInfo['Version'] + @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion'] + @@CustomProperties['WorkspaceID'] = getWorkspaceId + @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion] + encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey] + if !encodedAppInsightsKey.nil? + decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey) + @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey + end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}") + end + end + + def sendHeartBeatEvent(pluginName) + begin + eventName = pluginName + @@HeartBeat + if !(@@Tc.nil?) + @@Tc.track_event eventName , :properties => @@CustomProperties + @@Tc.flush + $log.info("AppInsights Heartbeat Telemetry sent successfully") + end + rescue =>errorStr + $log.warn("Exception in AppInsightsUtility: sendHeartBeatEvent - error: #{errorStr}") + end + end + + def sendCustomEvent(pluginName, properties) + begin + if !(@@Tc.nil?) + @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], + :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, + :properties => @@CustomProperties + @@Tc.flush + $log.info("AppInsights Container Count Telemetry sent successfully") + end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}") + end + end + + def sendExceptionTelemetry(errorStr) + begin + if @@CustomProperties.empty? || @@CustomProperties.nil? + initializeUtility + end + if !(@@Tc.nil?) + @@Tc.track_exception errorStr , :properties => @@CustomProperties + @@Tc.flush + $log.info("AppInsights Exception Telemetry sent successfully") + end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendExceptionTelemetry - error: #{errorStr}") + end + end + + #Method to send heartbeat and container inventory count + def sendTelemetry(pluginName, properties) + begin + if @@CustomProperties.empty? || @@CustomProperties.nil? + initializeUtility + end + @@CustomProperties['Computer'] = properties['Computer'] + sendHeartBeatEvent(pluginName) + sendCustomEvent(pluginName, properties) + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}") + end + end + + def getWorkspaceId() + begin + adminConf = {} + confFile = File.open(@OmsAdminFilePath, "r") + confFile.each_line do |line| + splitStrings = line.split('=') + adminConf[splitStrings[0]] = splitStrings[1] + end + workspaceId = adminConf['WORKSPACE_ID'] + return workspaceId + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: getWorkspaceId - error: #{errorStr}") + end + end + end +end \ No newline at end of file diff --git a/source/code/plugin/ContainerInventoryState.rb b/source/code/plugin/ContainerInventoryState.rb new file mode 100644 index 000000000..7e5ca18e8 --- /dev/null +++ b/source/code/plugin/ContainerInventoryState.rb @@ -0,0 +1,65 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +class ContainerInventoryState + require 'json' + require_relative 'omslog' + @@InventoryDirectory = "/var/opt/microsoft/docker-cimprov/state/ContainerInventory/" + + def initialize + end + + class << self + # Write the container information to disk with the data that is obtained from the current plugin execution + def writeContainerState(container) + containerId = container['InstanceID'] + if !containerId.nil? && !containerId.empty? + begin + file = File.open(@@InventoryDirectory + containerId, "w") + if !file.nil? + file.write(container.to_json) + file.close + else + $log.warn("Exception while opening file with id: #{containerId}") + end + rescue => errorStr + $log.warn("Exception in writeContainerState: #{errorStr}") + end + end + end + + # Reads the container state for the deleted container + def readContainerState(containerId) + begin + containerObject = nil + filepath = @@InventoryDirectory + containerId + file = File.open(filepath, "r") + if !file.nil? + fileContents = file.read + containerObject = JSON.parse(fileContents) + file.close + # Delete the file since the state is update to deleted + File.delete(filepath) if File.exist?(filepath) + else + $log.warn("Open file for container with id returned nil: #{containerId}") + end + rescue => errorStr + $log.warn("Exception in readContainerState: #{errorStr}") + end + return containerObject + end + + # Gets the containers that were written to the disk with the previous plugin invocation but do not exist in the current container list + # Doing this because we need to update the container state to deleted. Else this will stay running forever. + def getDeletedContainers(containerIds) + deletedContainers = nil + begin + previousContainerList = Dir.entries(@@InventoryDirectory) - [".", ".."] + deletedContainers = previousContainerList - containerIds + rescue => errorStr + $log.warn("Exception in getDeletedContainers: #{errorStr}") + end + return deletedContainers + end + end +end \ No newline at end of file diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb new file mode 100644 index 000000000..b93411980 --- /dev/null +++ b/source/code/plugin/DockerApiClient.rb @@ -0,0 +1,162 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +class DockerApiClient + + require 'socket' + require 'json' + require 'timeout' + require_relative 'omslog' + require_relative 'DockerApiRestHelper' + require_relative 'ApplicationInsightsUtility' + + @@SocketPath = "/var/run/docker.sock" + @@ChunkSize = 4096 + @@TimeoutInSeconds = 5 + @@PluginName = 'ContainerInventory' + def initialize + end + + class << self + # Make docker socket call for requests + def getResponse(request, isMultiJson) + begin + socket = UNIXSocket.new(@@SocketPath) + dockerResponse = "" + isTimeOut = false + socket.write(request) + # iterate through the response until the last chunk is less than the chunk size so that we can read all data in socket. + loop do + begin + responseChunk = "" + timeout(@@TimeoutInSeconds) do + responseChunk = socket.recv(@@ChunkSize) + end + dockerResponse += responseChunk + rescue Timeout::Error + $log.warn("Socket read timedout for request: #{request} @ #{Time.now.utc.iso8601}") + isTimeOut = true + end + break if responseChunk.length < @@ChunkSize + end + socket.close + return (isTimeOut)? nil : parseResponse(dockerResponse, isMultiJson) + rescue => errorStr + $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}") + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + + def parseResponse(dockerResponse, isMultiJson) + # Doing this because the response is in the raw format and includes headers. + # Need to do a regex match to extract the json part of the response - Anything between [{}] in response + parsedJsonResponse = nil + begin + jsonResponse = isMultiJson ? dockerResponse[/\[{.+}\]/] : dockerResponse[/{.+}/] + rescue => errorStr + $log.warn("Regex match for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}") + end + begin + if jsonResponse != nil + parsedJsonResponse = JSON.parse(jsonResponse) + end + rescue => errorStr + $log.warn("Json parsing for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}") + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + return parsedJsonResponse + end + + + def getDockerHostName() + dockerHostName = "" + request = DockerApiRestHelper.restDockerInfo + response = getResponse(request, false) + if (response != nil) + dockerHostName = response['Name'] + end + return dockerHostName + end + + def listContainers() + ids = [] + request = DockerApiRestHelper.restDockerPs + containers = getResponse(request, true) + if !containers.nil? && !containers.empty? + containers.each do |container| + ids.push(container['Id']) + end + end + return ids + end + + # This method splits the tag value into an array - repository, image and tag + def getImageRepositoryImageTag(tagValue) + result = ["", "", ""] + begin + if !tagValue.empty? + # Find delimiters in the string of format repository/image:imagetag + slashLocation = tagValue.index('/') + colonLocation = tagValue.index(':') + if !colonLocation.nil? + if slashLocation.nil? + # image:imagetag + result[1] = tagValue[0..(colonLocation-1)] + else + # repository/image:imagetag + result[0] = tagValue[0..(slashLocation-1)] + result[1] = tagValue[(slashLocation + 1)..(colonLocation - 1)] + end + result[2] = tagValue[(colonLocation + 1)..-1] + end + end + rescue => errorStr + $log.warn("Exception at getImageRepositoryImageTag: #{errorStr} @ #{Time.now.utc.iso8601}") + end + return result + end + + # Image is in the format repository/image:imagetag - This method creates a hash of image id and repository, image and tag + def getImageIdMap() + result = nil + begin + request = DockerApiRestHelper.restDockerImages + images = getResponse(request, true) + if !images.nil? && !images.empty? + result = {} + images.each do |image| + tagValue = "" + tags = image['RepoTags'] + if !tags.nil? && tags.kind_of?(Array) && tags.length > 0 + tagValue = tags[0] + end + idValue = image['Id'] + if !idValue.nil? + result[idValue] = getImageRepositoryImageTag(tagValue) + end + end + end + rescue => errorStr + $log.warn("Exception at getImageIdMap: #{errorStr} @ #{Time.now.utc.iso8601}") + end + return result + end + + def dockerInspectContainer(id) + request = DockerApiRestHelper.restDockerInspect(id) + return getResponse(request, false) + end + + # This method returns docker version and docker api version for telemetry + def dockerInfo() + request = DockerApiRestHelper.restDockerVersion + response = getResponse(request, false) + dockerInfo = {} + if (response != nil) + dockerInfo['Version'] = response['Version'] + dockerInfo['ApiVersion'] = response['ApiVersion'] + end + return dockerInfo + end + end +end \ No newline at end of file diff --git a/source/code/plugin/DockerApiRestHelper.rb b/source/code/plugin/DockerApiRestHelper.rb new file mode 100644 index 000000000..76361b122 --- /dev/null +++ b/source/code/plugin/DockerApiRestHelper.rb @@ -0,0 +1,55 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +class DockerApiRestHelper + def initialize + end + + class << self + # Create the REST request to list images + # https://docs.docker.com/engine/reference/api/docker_remote_api_v1.21/#list-images + # returns Request in string format + def restDockerImages() + begin + return "GET /images/json?all=0 HTTP/1.1\r\nHost: localhost\r\n\r\n"; + end + end + + # Create the REST request to list containers + # https://docs.docker.com/engine/reference/api/docker_remote_api_v1.21/#list-containers + # returns Request in string format + def restDockerPs() + begin + return "GET /containers/json?all=1 HTTP/1.1\r\nHost: localhost\r\n\r\n"; + end + end + + # Create the REST request to inspect a container + # https://docs.docker.com/engine/reference/api/docker_remote_api_v1.21/#inspect-a-container + # parameter - ID of the container to be inspected + # returns Request in string format + def restDockerInspect(id) + begin + return "GET /containers/" + id + "/json HTTP/1.1\r\nHost: localhost\r\n\r\n"; + end + end + + # Create the REST request to get docker info + # https://docs.docker.com/engine/reference/api/docker_remote_api_v1.21/#get-container-stats-based-on-resource-usage + # returns Request in string format + def restDockerInfo() + begin + return "GET /info HTTP/1.1\r\nHost: localhost\r\n\r\n"; + end + end + + # Create the REST request to get docker info + # https://docs.docker.com/engine/api/v1.21/#21-containers + # returns Request in string format + def restDockerVersion() + begin + return "GET /version HTTP/1.1\r\nHost: localhost\r\n\r\n"; + end + end + end +end \ No newline at end of file diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb new file mode 100644 index 000000000..43811e1e1 --- /dev/null +++ b/source/code/plugin/in_containerinventory.rb @@ -0,0 +1,266 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +module Fluent + + class Container_Inventory_Input < Input + Plugin.register_input('containerinventory', self) + + @@PluginName = 'ContainerInventory' + @@RunningState = 'Running' + @@FailedState = 'Failed' + @@StoppedState = 'Stopped' + @@PausedState = 'Paused' + + def initialize + super + require 'json' + require_relative 'DockerApiClient' + require_relative 'ContainerInventoryState' + require_relative 'ApplicationInsightsUtility' + require_relative 'omslog' + end + + config_param :run_interval, :time, :default => '1m' + config_param :tag, :string, :default => "oms.containerinsights.containerinventory" + + def configure (conf) + super + end + + def start + if @run_interval + @finished = false + @condition = ConditionVariable.new + @mutex = Mutex.new + @thread = Thread.new(&method(:run_periodic)) + @@telemetryTimeTracker = DateTime.now.to_time.to_i + end + end + + def shutdown + if @run_interval + @mutex.synchronize { + @finished = true + @condition.signal + } + @thread.join + end + end + + def obtainContainerConfig(instance, container) + begin + configValue = container['Config'] + if !configValue.nil? + instance['ContainerHostname'] = configValue['Hostname'] + + envValue = configValue['Env'] + envValueString = (envValue.nil?) ? "" : envValue.to_s + # Restricting the ENV string value to 200kb since the size of this string can go very high + if envValueString.length > 200000 + envValueStringTruncated = envValueString.slice(0..200000) + lastIndex = envValueStringTruncated.rindex("\", ") + if !lastIndex.nil? + envValueStringTruncated = envValueStringTruncated.slice(0..lastIndex) + "]" + end + instance['EnvironmentVar'] = envValueStringTruncated + else + instance['EnvironmentVar'] = envValueString + end + + cmdValue = configValue['Cmd'] + cmdValueString = (cmdValue.nil?) ? "" : cmdValue.to_s + instance['Command'] = cmdValueString + + instance['ComposeGroup'] = "" + labelsValue = configValue['Labels'] + if !labelsValue.nil? && !labelsValue.empty? + instance['ComposeGroup'] = labelsValue['com.docker.compose.project'] + end + else + $log.warn("Attempt in ObtainContainerConfig to get container: #{container['Id']} config information returned null") + end + rescue => errorStr + $log.warn("Exception in obtainContainerConfig: #{errorStr}") + end + end + + def obtainContainerState(instance, container) + begin + stateValue = container['State'] + if !stateValue.nil? + exitCodeValue = stateValue['ExitCode'] + # Exit codes less than 0 are not supported by the engine + if exitCodeValue < 0 + exitCodeValue = 128 + $log.info("obtainContainerState::Container: #{container['Id']} returned negative exit code") + end + instance['ExitCode'] = exitCodeValue + if exitCodeValue > 0 + instance['State'] = @@FailedState + else + # Set the Container status : Running/Paused/Stopped + runningValue = stateValue['Running'] + if runningValue + pausedValue = stateValue['Paused'] + # Checking for paused within running is true state because docker returns true for both Running and Paused fields when the container is paused + if pausedValue + instance['State'] = @@PausedState + else + instance['State'] = @@RunningState + end + else + instance['State'] = @@StoppedState + end + end + instance['StartedTime'] = stateValue['StartedAt'] + instance['FinishedTime'] = stateValue['FinishedAt'] + else + $log.info("Attempt in ObtainContainerState to get container: #{container['Id']} state information returned null") + end + rescue => errorStr + $log.warn("Exception in obtainContainerState: #{errorStr}") + end + end + + def obtainContainerHostConfig(instance, container) + begin + hostConfig = container['HostConfig'] + if !hostConfig.nil? + links = hostConfig['Links'] + instance['Links'] = "" + if !links.nil? + linksString = links.to_s + instance['Links'] = (linksString == "null")? "" : linksString + end + portBindings = hostConfig['PortBindings'] + instance['Ports'] = "" + if !portBindings.nil? + portBindingsString = portBindings.to_s + instance['Ports'] = (portBindingsString == "null")? "" : portBindingsString + end + else + $log.info("Attempt in ObtainContainerHostConfig to get container: #{container['Id']} host config information returned null") + end + rescue => errorStr + $log.warn("Exception in obtainContainerHostConfig: #{errorStr}") + end + end + + def inspectContainer(id, nameMap) + containerInstance = {} + begin + container = DockerApiClient.dockerInspectContainer(id) + if !container.nil? && !container.empty? + containerInstance['InstanceID'] = container['Id'] + containerInstance['CreatedTime'] = container['Created'] + containerName = container['Name'] + if !containerName.nil? && !containerName.empty? + # Remove the leading / from the name if it exists (this is an API issue) + containerInstance['ElementName'] = (containerName[0] == '/') ? containerName[1..-1] : containerName + end + imageValue = container['Image'] + if !imageValue.nil? && !imageValue.empty? + containerInstance['ImageId'] = imageValue + repoImageTagArray = nameMap[imageValue] + if nameMap.has_key? imageValue + containerInstance['Repository'] = repoImageTagArray[0] + containerInstance['Image'] = repoImageTagArray[1] + containerInstance['ImageTag'] = repoImageTagArray[2] + end + end + obtainContainerConfig(containerInstance, container); + obtainContainerState(containerInstance, container); + obtainContainerHostConfig(containerInstance, container); + end + rescue => errorStr + $log.warn("Exception in inspectContainer: #{errorStr} for container: #{id}") + end + return containerInstance + end + + def enumerate + currentTime = Time.now + emitTime = currentTime.to_f + batchTime = currentTime.utc.iso8601 + containerInventory = Array.new + $log.info("in_container_inventory::enumerate : Begin processing @ #{Time.now.utc.iso8601}") + hostname = DockerApiClient.getDockerHostName + begin + containerIds = DockerApiClient.listContainers + if !containerIds.empty? + eventStream = MultiEventStream.new + nameMap = DockerApiClient.getImageIdMap + containerIds.each do |containerId| + inspectedContainer = {} + inspectedContainer = inspectContainer(containerId, nameMap) + inspectedContainer['Computer'] = hostname + inspectedContainer['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated + containerInventory.push inspectedContainer + ContainerInventoryState.writeContainerState(inspectedContainer) + end + # Update the state for deleted containers + deletedContainers = ContainerInventoryState.getDeletedContainers(containerIds) + if !deletedContainers.nil? && !deletedContainers.empty? + deletedContainers.each do |deletedContainer| + container = ContainerInventoryState.readContainerState(deletedContainer) + if !container.nil? + container.each{|k,v| container[k]=v} + container['State'] = "Deleted" + containerInventory.push container + end + end + end + + containerInventory.each do |record| + wrapper = { + "DataType"=>"CONTAINER_INVENTORY_BLOB", + "IPName"=>"ContainerInsights", + "DataItems"=>[record.each{|k,v| record[k]=v}] + } + eventStream.add(emitTime, wrapper) if wrapper + end + router.emit_stream(@tag, eventStream) if eventStream + @@istestvar = ENV['ISTEST'] + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) + $log.info("containerInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end + timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference/60 + if (timeDifferenceInMinutes >= 5) + @@telemetryTimeTracker = DateTime.now.to_time.to_i + telemetryProperties = {} + telemetryProperties['Computer'] = hostname + telemetryProperties['ContainerCount'] = containerInventory.length + ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) + end + $log.info("in_container_inventory::enumerate : Processing complete - emitted stream @ #{Time.now.utc.iso8601}") + end + rescue => errorStr + $log.warn("Exception in enumerate container inventory: #{errorStr}") + end + end + + def run_periodic + @mutex.lock + done = @finished + until done + @condition.wait(@mutex, @run_interval) + done = @finished + @mutex.unlock + if !done + begin + $log.info("in_container_inventory::run_periodic @ #{Time.now.utc.iso8601}") + enumerate + rescue => errorStr + $log.warn "in_container_inventory::run_periodic: Failed in enumerate container inventory: #{errorStr}" + end + end + @mutex.lock + end + @mutex.unlock + end + + end # Container_Inventory_Input + +end # module \ No newline at end of file diff --git a/source/code/plugin/lib/application_insights.rb b/source/code/plugin/lib/application_insights.rb new file mode 100644 index 000000000..0a683d484 --- /dev/null +++ b/source/code/plugin/lib/application_insights.rb @@ -0,0 +1,9 @@ +require_relative 'application_insights/telemetry_client' +require_relative 'application_insights/unhandled_exception' +require_relative 'application_insights/version' + +module ApplicationInsights + module Rack + autoload :TrackRequest, "application_insights/rack/track_request" + end +end diff --git a/source/code/plugin/lib/application_insights/channel/asynchronous_queue.rb b/source/code/plugin/lib/application_insights/channel/asynchronous_queue.rb new file mode 100644 index 000000000..333f6968b --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/asynchronous_queue.rb @@ -0,0 +1,58 @@ +require_relative 'event' +require_relative 'queue_base' + +module ApplicationInsights + module Channel + # An asynchronous queue for use in conjunction with the {AsynchronousSender}. + # The queue will notify the sender that it needs to pick up items when it + # reaches {#max_queue_length}, or when the consumer calls {#flush} via the + # {#flush_notification} event. + # + # @example + # require 'application_insights' + # require 'thread' + # queue = ApplicationInsights::Channel::AsynchronousQueue.new nil + # Thread.new do + # sleep 1 + # queue.push 1 + # queue.flush + # end + # queue.flush_notification.wait + # queue.flush_notification.clear + # result = queue.pop + class AsynchronousQueue < QueueBase + # Initializes a new instance of the class. + # @param [SenderBase] sender the sender object that will be used in + # conjunction with this queue. In addition to the sender object must + # support a {AsynchronousSender#start} method which is invoked each time + # an item is pushed to the queue as well as use the {#flush_notification} + # event. + def initialize(sender) + @flush_notification = Event.new + super sender + end + + # The flush notification {ApplicationInsights::Channel::Event} that the {#sender} + # will use to get notified that a flush is needed. + # @return [Event] object that the {#sender} can wait on. + attr_reader :flush_notification + + # Adds the passed in item object to the queue and notifies the {#sender} + # to start an asynchronous send operation + # by calling {AsynchronousSender#start}. + # @param [Contracts::Envelope] item the telemetry envelope object to send + # to the service. + def push(item) + super item + @sender.start if @sender + end + + # Flushes the current queue by notifying the {#sender} via the + # {#flush_notification} event. + def flush + @flush_notification.set + @sender.start if @sender + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/asynchronous_sender.rb b/source/code/plugin/lib/application_insights/channel/asynchronous_sender.rb new file mode 100644 index 000000000..da573f08c --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/asynchronous_sender.rb @@ -0,0 +1,133 @@ +require_relative 'sender_base' +require 'thread' + +module ApplicationInsights + module Channel + # An asynchronous sender that works in conjunction with the {AsynchronousQueue}. + # The sender object will start a worker thread that will pull items from the + # {#queue}. The thread will be created when the client calls {#start} and + # will check for queue items every {#send_interval} seconds. The worker thread + # can also be forced to check the queue by setting the + # {AsynchronousQueue#flush_notification} event. + # + # - If no items are found, the thread will go back to sleep. + # - If items are found, the worker thread will send items to the specified + # service in batches of {#send_buffer_size}. + # + # If no queue items are found for {#send_time} seconds, the worker thread + # will shut down (and {#start} will need to be called again). + class AsynchronousSender < SenderBase + SERVICE_ENDPOINT_URI = 'https://dc.services.visualstudio.com/v2/track' + # Initializes a new instance of the class. + # @param [String] service_endpoint_uri the address of the service to send + # telemetry data to. + def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI) + @send_interval = 1.0 + @send_remaining_time = 0 + @send_time = 3.0 + @lock_work_thread = Mutex.new + @work_thread = nil + @start_notification_processed = true + super service_endpoint_uri + end + + # The time span in seconds at which the the worker thread will check the + # {#queue} for items (defaults to: 1.0). + # @return [Fixnum] the interval in seconds. + attr_accessor :send_interval + + # The time span in seconds for which the worker thread will stay alive if + # no items are found in the {#queue} (defaults to 3.0). + # @return [Fixnum] the interval in seconds. + attr_accessor :send_time + + # The worker thread which checks queue items and send data every + # (#send_interval) seconds or upon flush. + # @return [Thread] the work thread + attr_reader :work_thread + + # Calling this method will create a worker thread that checks the {#queue} + # every {#send_interval} seconds for a total duration of {#send_time} + # seconds for new items. If a worker thread has already been created, + # calling this method does nothing. + def start + @start_notification_processed = false + # Maintain one working thread at one time + unless @work_thread + @lock_work_thread.synchronize do + unless @work_thread + local_send_interval = [@send_interval, 0.1].max + @send_remaining_time = [@send_time, local_send_interval].max + @work_thread = Thread.new { run } + @work_thread.abort_on_exception = false + end + end + end + end + + private + + def run + # save the queue locally + local_queue = @queue + if local_queue.nil? + @work_thread = nil + return + end + + begin + # fix up the send interval (can't be lower than 100ms) + local_send_interval = [@send_interval, 0.1].max + + while true + @start_notification_processed = true + while true + # get at most @send_buffer_size items from the queue + data = [] + @send_buffer_size.downto(1) do + item = local_queue.pop + break if not item + data.push item + end + + # if we didn't get any items from the queue, we're done here + break if data.length == 0 + + # reset the send time + @send_remaining_time = @send_time + + # finally send the data + send data + end + + # wait at most @send_interval ms (or until we get signalled) + result = local_queue.flush_notification.wait local_send_interval + if result + local_queue.flush_notification.clear + next + end + + # decrement the remaining time + @send_remaining_time -= local_send_interval + # If remaining time <=0 and there is no start notification unprocessed, + # then stop the working thread + if @send_remaining_time <= 0 && @start_notification_processed + # Note: there is still a chance some start notification could be + # missed, e.g., the start method got triggered between the above and + # following line. However the data is not lost as it would be + # processed later when next start notification comes after the worker + # thread stops. The cost to ensure no notification miss is high where + # a lock is required each time the start method calls. + @work_thread = nil + break + end + end + rescue Exception => e + # Make sure work_thread sets to nil when it terminates abnormally + @work_thread = nil + @logger.error('application_insights') { "Asynchronous sender work thread terminated abnormally: #{e.to_s}" } + end + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/application.rb b/source/code/plugin/lib/application_insights/channel/contracts/application.rb new file mode 100644 index 000000000..071c37385 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/application.rb @@ -0,0 +1,13 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Application + include JsonSerializable + + attr_accessor :ver + + attribute_mapping( + ver: 'ai.application.ver' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/availability_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/availability_data.rb new file mode 100644 index 000000000..d560dd15b --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/availability_data.rb @@ -0,0 +1,34 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class AvailabilityData + include JsonSerializable + + attr_accessor :ver, :id, :name, :duration, :success, :run_location, :message, + :properties, :measurements + + attribute_mapping( + ver: 'ver', + id: 'id', + name: 'name', + duration: 'duration', + success: 'success', + run_location: 'runLocation', + message: 'message', + properties: 'properties', + measurements: 'measurements' + ) + + def ver + @ver ||= 2 + end + + def properties + @properties ||= {} + end + + def measurements + @measurements ||= {} + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/base.rb b/source/code/plugin/lib/application_insights/channel/contracts/base.rb new file mode 100644 index 000000000..bb88a4625 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/base.rb @@ -0,0 +1,13 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Base + include JsonSerializable + + attr_accessor :base_type + + attribute_mapping( + base_type: 'baseType' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/cloud.rb b/source/code/plugin/lib/application_insights/channel/contracts/cloud.rb new file mode 100644 index 000000000..5aaeeee04 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/cloud.rb @@ -0,0 +1,14 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Cloud + include JsonSerializable + + attr_accessor :role, :role_instance + + attribute_mapping( + role: 'ai.cloud.role', + role_instance: 'ai.cloud.roleInstance' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/data.rb b/source/code/plugin/lib/application_insights/channel/contracts/data.rb new file mode 100644 index 000000000..c7184edfd --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/data.rb @@ -0,0 +1,14 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Data + include JsonSerializable + + attr_accessor :base_type, :base_data + + attribute_mapping( + base_type: 'baseType', + base_data: 'baseData' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/data_point.rb b/source/code/plugin/lib/application_insights/channel/contracts/data_point.rb new file mode 100644 index 000000000..6556b351b --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/data_point.rb @@ -0,0 +1,25 @@ +require_relative 'json_serializable' +require_relative 'data_point_type' + +module ApplicationInsights::Channel::Contracts + class DataPoint + include JsonSerializable + + attr_accessor :ns, :name, :kind, :value, :count, :min, :max, :std_dev + + attribute_mapping( + ns: 'ns', + name: 'name', + kind: 'kind', + value: 'value', + count: 'count', + min: 'min', + max: 'max', + std_dev: 'stdDev' + ) + + def kind + @kind ||= DataPointType::MEASUREMENT + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/data_point_type.rb b/source/code/plugin/lib/application_insights/channel/contracts/data_point_type.rb new file mode 100644 index 000000000..f9816e4a9 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/data_point_type.rb @@ -0,0 +1,7 @@ +module ApplicationInsights::Channel::Contracts + class DataPointType + MEASUREMENT = 0 + + AGGREGATION = 1 + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/dependency_kind.rb b/source/code/plugin/lib/application_insights/channel/contracts/dependency_kind.rb new file mode 100644 index 000000000..38a441499 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/dependency_kind.rb @@ -0,0 +1,9 @@ +module ApplicationInsights::Channel::Contracts + class DependencyKind + SQL = 0 + + HTTP = 1 + + OTHER = 2 + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb b/source/code/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb new file mode 100644 index 000000000..a68dad72b --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/dependency_source_type.rb @@ -0,0 +1,9 @@ +module ApplicationInsights::Channel::Contracts + class DependencySourceType + UNDEFINED = 0 + + AIC = 1 + + APMC = 2 + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/device.rb b/source/code/plugin/lib/application_insights/channel/contracts/device.rb new file mode 100644 index 000000000..af6855102 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/device.rb @@ -0,0 +1,18 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Device + include JsonSerializable + + attr_accessor :id, :locale, :model, :oem_name, :os_version, :type + + attribute_mapping( + id: 'ai.device.id', + locale: 'ai.device.locale', + model: 'ai.device.model', + oem_name: 'ai.device.oemName', + os_version: 'ai.device.osVersion', + type: 'ai.device.type' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/domain.rb b/source/code/plugin/lib/application_insights/channel/contracts/domain.rb new file mode 100644 index 000000000..8a7ba880d --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/domain.rb @@ -0,0 +1,10 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Domain + include JsonSerializable + + attribute_mapping( + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/envelope.rb b/source/code/plugin/lib/application_insights/channel/contracts/envelope.rb new file mode 100644 index 000000000..b8608e388 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/envelope.rb @@ -0,0 +1,32 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Envelope + include JsonSerializable + + attr_accessor :ver, :name, :time, :sample_rate, :seq, :i_key, :tags, :data + + attribute_mapping( + ver: 'ver', + name: 'name', + time: 'time', + sample_rate: 'sampleRate', + seq: 'seq', + i_key: 'iKey', + tags: 'tags', + data: 'data' + ) + + def ver + @ver ||= 1 + end + + def sample_rate + @sample_rate ||= 100.0 + end + + def tags + @tags ||= {} + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/event_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/event_data.rb new file mode 100644 index 000000000..4bfb16124 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/event_data.rb @@ -0,0 +1,28 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class EventData + include JsonSerializable + + attr_accessor :ver, :name, :properties, :measurements + + attribute_mapping( + ver: 'ver', + name: 'name', + properties: 'properties', + measurements: 'measurements' + ) + + def ver + @ver ||= 2 + end + + def properties + @properties ||= {} + end + + def measurements + @measurements ||= {} + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/exception_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/exception_data.rb new file mode 100644 index 000000000..5cffd1253 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/exception_data.rb @@ -0,0 +1,35 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class ExceptionData + include JsonSerializable + + attr_accessor :ver, :exceptions, :severity_level, :problem_id, :properties, + :measurements + + attribute_mapping( + ver: 'ver', + exceptions: 'exceptions', + severity_level: 'severityLevel', + problem_id: 'problemId', + properties: 'properties', + measurements: 'measurements' + ) + + def ver + @ver ||= 2 + end + + def exceptions + @exceptions ||= [] + end + + def properties + @properties ||= {} + end + + def measurements + @measurements ||= {} + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/exception_details.rb b/source/code/plugin/lib/application_insights/channel/contracts/exception_details.rb new file mode 100644 index 000000000..85bfc6282 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/exception_details.rb @@ -0,0 +1,28 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class ExceptionDetails + include JsonSerializable + + attr_accessor :id, :outer_id, :type_name, :message, :has_full_stack, :stack, + :parsed_stack + + attribute_mapping( + id: 'id', + outer_id: 'outerId', + type_name: 'typeName', + message: 'message', + has_full_stack: 'hasFullStack', + stack: 'stack', + parsed_stack: 'parsedStack' + ) + + def has_full_stack + @has_full_stack.nil? ? true : @has_full_stack + end + + def parsed_stack + @parsed_stack ||= [] + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/internal.rb b/source/code/plugin/lib/application_insights/channel/contracts/internal.rb new file mode 100644 index 000000000..6e8f3d300 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/internal.rb @@ -0,0 +1,15 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Internal + include JsonSerializable + + attr_accessor :sdk_version, :agent_version, :node_name + + attribute_mapping( + sdk_version: 'ai.internal.sdkVersion', + agent_version: 'ai.internal.agentVersion', + node_name: 'ai.internal.nodeName' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb b/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb new file mode 100644 index 000000000..8f4677044 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/json_serializable.rb @@ -0,0 +1,59 @@ +require 'json' + +module ApplicationInsights + module Channel + module Contracts + module JsonSerializable + module ClassMethods + attr_reader :json_mappings + + def attribute_mapping(mappings = {}) + @json_mappings = mappings + end + end + + def self.included(klass) + klass.extend JsonSerializable::ClassMethods + end + + def initialize(attributes = {}) + attributes.each { |k, v| send(:"#{k}=", v) } + end + + def to_h + output = {} + klass = self.class + + klass.json_mappings.each do |attr, name| + value = visit self.send(attr) + is_empty = value.respond_to?(:empty?) && value.empty? + + output[name] = value unless value.nil? || is_empty + end + + output + end + + def to_json(args = {}) + JSON.generate self.to_h, args + end + + private + + def visit(object) + return if object.nil? + + if object.is_a? Array + object.map { |e| visit e } + elsif object.is_a? Hash + Hash[object.map { |k, v| [k, visit(v)] }] + elsif object.respond_to? :to_h + object.to_h + else + object + end + end + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/location.rb b/source/code/plugin/lib/application_insights/channel/contracts/location.rb new file mode 100644 index 000000000..4136c869b --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/location.rb @@ -0,0 +1,13 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Location + include JsonSerializable + + attr_accessor :ip + + attribute_mapping( + ip: 'ai.location.ip' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/message_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/message_data.rb new file mode 100644 index 000000000..1340f5ba7 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/message_data.rb @@ -0,0 +1,24 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class MessageData + include JsonSerializable + + attr_accessor :ver, :message, :severity_level, :properties + + attribute_mapping( + ver: 'ver', + message: 'message', + severity_level: 'severityLevel', + properties: 'properties' + ) + + def ver + @ver ||= 2 + end + + def properties + @properties ||= {} + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/metric_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/metric_data.rb new file mode 100644 index 000000000..bcb5739d6 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/metric_data.rb @@ -0,0 +1,27 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class MetricData + include JsonSerializable + + attr_accessor :ver, :metrics, :properties + + attribute_mapping( + ver: 'ver', + metrics: 'metrics', + properties: 'properties' + ) + + def ver + @ver ||= 2 + end + + def metrics + @metrics ||= [] + end + + def properties + @properties ||= {} + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/operation.rb b/source/code/plugin/lib/application_insights/channel/contracts/operation.rb new file mode 100644 index 000000000..c86dd111b --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/operation.rb @@ -0,0 +1,17 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Operation + include JsonSerializable + + attr_accessor :id, :name, :parent_id, :synthetic_source, :correlation_vector + + attribute_mapping( + id: 'ai.operation.id', + name: 'ai.operation.name', + parent_id: 'ai.operation.parentId', + synthetic_source: 'ai.operation.syntheticSource', + correlation_vector: 'ai.operation.correlationVector' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/page_view_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/page_view_data.rb new file mode 100644 index 000000000..d17dd2f79 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/page_view_data.rb @@ -0,0 +1,33 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class PageViewData + include JsonSerializable + + attr_accessor :ver, :url, :name, :duration, :id, :referrer_uri, :properties, + :measurements + + attribute_mapping( + ver: 'ver', + url: 'url', + name: 'name', + duration: 'duration', + id: 'id', + referrer_uri: 'referrerUri', + properties: 'properties', + measurements: 'measurements' + ) + + def ver + @ver ||= 2 + end + + def properties + @properties ||= {} + end + + def measurements + @measurements ||= {} + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb new file mode 100644 index 000000000..adde3f3ad --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/page_view_perf_data.rb @@ -0,0 +1,39 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class PageViewPerfData + include JsonSerializable + + attr_accessor :ver, :url, :perf_total, :name, :duration, :network_connect, + :sent_request, :received_response, :id, :dom_processing, :referrer_uri, + :properties, :measurements + + attribute_mapping( + ver: 'ver', + url: 'url', + perf_total: 'perfTotal', + name: 'name', + duration: 'duration', + network_connect: 'networkConnect', + sent_request: 'sentRequest', + received_response: 'receivedResponse', + id: 'id', + dom_processing: 'domProcessing', + referrer_uri: 'referrerUri', + properties: 'properties', + measurements: 'measurements' + ) + + def ver + @ver ||= 2 + end + + def properties + @properties ||= {} + end + + def measurements + @measurements ||= {} + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb new file mode 100644 index 000000000..a238841f6 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/remote_dependency_data.rb @@ -0,0 +1,40 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class RemoteDependencyData + include JsonSerializable + + attr_accessor :ver, :name, :id, :result_code, :duration, :success, :data, + :target, :type, :properties, :measurements + + attribute_mapping( + ver: 'ver', + name: 'name', + id: 'id', + result_code: 'resultCode', + duration: 'duration', + success: 'success', + data: 'data', + target: 'target', + type: 'type', + properties: 'properties', + measurements: 'measurements' + ) + + def ver + @ver ||= 2 + end + + def success + @success.nil? ? true : @success + end + + def properties + @properties ||= {} + end + + def measurements + @measurements ||= {} + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/reopenings.rb b/source/code/plugin/lib/application_insights/channel/contracts/reopenings.rb new file mode 100644 index 000000000..394bf8afb --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/reopenings.rb @@ -0,0 +1,27 @@ +module ApplicationInsights::Channel::Contracts + class ExceptionData + def handled_at + @properties["handledAt"] if @properties + end + + def handled_at=(handled_at) + if handled_at + @properties ||= {} + @properties["handledAt"] = handled_at + end + end + end + + class RequestData + def http_method + @properties["httpMethod"] if @properties + end + + def http_method=(http_method) + if http_method + @properties ||= {} + @properties["httpMethod"] = http_method + end + end + end +end \ No newline at end of file diff --git a/source/code/plugin/lib/application_insights/channel/contracts/request_data.rb b/source/code/plugin/lib/application_insights/channel/contracts/request_data.rb new file mode 100644 index 000000000..af2581c2b --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/request_data.rb @@ -0,0 +1,35 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class RequestData + include JsonSerializable + + attr_accessor :ver, :id, :source, :name, :duration, :response_code, :success, + :url, :properties, :measurements + + attribute_mapping( + ver: 'ver', + id: 'id', + source: 'source', + name: 'name', + duration: 'duration', + response_code: 'responseCode', + success: 'success', + url: 'url', + properties: 'properties', + measurements: 'measurements' + ) + + def ver + @ver ||= 2 + end + + def properties + @properties ||= {} + end + + def measurements + @measurements ||= {} + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/session.rb b/source/code/plugin/lib/application_insights/channel/contracts/session.rb new file mode 100644 index 000000000..a761c51c5 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/session.rb @@ -0,0 +1,14 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class Session + include JsonSerializable + + attr_accessor :id, :is_first + + attribute_mapping( + id: 'ai.session.id', + is_first: 'ai.session.isFirst' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/severity_level.rb b/source/code/plugin/lib/application_insights/channel/contracts/severity_level.rb new file mode 100644 index 000000000..322a00ec3 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/severity_level.rb @@ -0,0 +1,13 @@ +module ApplicationInsights::Channel::Contracts + class SeverityLevel + VERBOSE = 0 + + INFORMATION = 1 + + WARNING = 2 + + ERROR = 3 + + CRITICAL = 4 + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/stack_frame.rb b/source/code/plugin/lib/application_insights/channel/contracts/stack_frame.rb new file mode 100644 index 000000000..b4f4b9844 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/stack_frame.rb @@ -0,0 +1,17 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class StackFrame + include JsonSerializable + + attr_accessor :level, :method, :assembly, :file_name, :line + + attribute_mapping( + level: 'level', + method: 'method', + assembly: 'assembly', + file_name: 'fileName', + line: 'line' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/contracts/user.rb b/source/code/plugin/lib/application_insights/channel/contracts/user.rb new file mode 100644 index 000000000..a7ff8a7cf --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/contracts/user.rb @@ -0,0 +1,15 @@ +require_relative 'json_serializable' + +module ApplicationInsights::Channel::Contracts + class User + include JsonSerializable + + attr_accessor :account_id, :id, :auth_user_id + + attribute_mapping( + account_id: 'ai.user.accountId', + id: 'ai.user.id', + auth_user_id: 'ai.user.authUserId' + ) + end +end diff --git a/source/code/plugin/lib/application_insights/channel/event.rb b/source/code/plugin/lib/application_insights/channel/event.rb new file mode 100644 index 000000000..ae61064f8 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/event.rb @@ -0,0 +1,68 @@ +require_relative 'queue_base' +require 'thread' + +module ApplicationInsights + module Channel + # An event class that allows simple cross-thread signalling. + # + # An object of this type managers an internal flag that can be set to true + # via the {#set} method and reset via the {#clear} method. Calling the + # {#wait} method will block until the flag is set to true. + # + # @example + # require 'application_insights' + # require 'thread' + # event = ApplicationInsights::Channel::Event.new + # Thread.new do + # sleep 1 + # event.set + # end + # puts 'Main screen turn on.' + # result = event.wait + # puts 'All your base are belong to us.' + class Event + # Initializes a new instance of the class. + def initialize + @mutex = Mutex.new + @condition_variable = ConditionVariable.new + @signal = false + end + + # The signal value for this object. Note that the value of this property is + # not synchronized with respect to {#set} and {#clear} meaning that it + # could return false positives or negatives. + # @return [Boolean] the signal value. + attr_reader :signal + + # Sets the internal flag to true. Calling this method will also cause all + # waiting threads to awaken. + def set + @mutex.synchronize do + @signal = true + @condition_variable.broadcast + end + end + + # Sets the internal flag to false. + def clear + @mutex.synchronize do + @signal = false + end + end + + # Calling this method will block until the internal flag is set to true. + # If the flag is set to true before calling this method, we will return + # immediately. If the timeout parameter is specified, the method will + # unblock after the specified number of seconds. + # @param [Fixnum] timeout the timeout for the operation in seconds. + # @return [Boolean] the value of the internal flag on exit. + def wait(timeout=nil) + @mutex.synchronize do + @condition_variable.wait(@mutex, timeout) unless @signal + end + + @signal + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/queue_base.rb b/source/code/plugin/lib/application_insights/channel/queue_base.rb new file mode 100644 index 000000000..91226b17f --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/queue_base.rb @@ -0,0 +1,73 @@ +require 'thread' + +module ApplicationInsights + module Channel + # The base class for all types of queues for use in conjunction with an + # implementation of {SenderBase}. The queue will notify the sender that it + # needs to pick up items when it reaches {#max_queue_length}, or when the + # consumer calls {#flush}. + class QueueBase + # Initializes a new instance of the class. + # @param [SenderBase] sender the sender object that will be used in + # conjunction with this queue. + def initialize(sender) + @queue = Queue.new + @max_queue_length = 500 + self.sender = sender + end + + # The maximum number of items that will be held by the queue before the + # queue will call the {#flush} method. + # @return [Fixnum] the maximum queue size. (defaults to: 500) + attr_accessor :max_queue_length + + # The sender that is associated with this queue that this queue will use to + # send data to the service. + # @return [SenderBase] the sender object. + attr_reader :sender + + # Change the sender that is associated with this queue. + # @param [SenderBase] sender the sender object. + # @return [SenderBase] the sender object. + def sender=(sender) + @sender = sender + @sender.queue = self if sender + @sender + end + + # Adds the passed in item object to the queue and calls {#flush} if the + # size of the queue is larger than {#max_queue_length}. This method does + # nothing if the passed in item is nil. + # @param [Contracts::Envelope] item the telemetry envelope object to send + # to the service. + def push(item) + return unless item + + @queue.push(item) + + flush if @queue.length >= @max_queue_length + end + + # Pops a single item from the queue and returns it. If the queue is empty, + # this method will return nil. + # @return [Contracts::Envelope] a telemetry envelope object or nil if the + # queue is empty. + def pop + return @queue.pop(true) + rescue ThreadError + return nil + end + + # Flushes the current queue by notifying the {#sender}. This method needs + # to be overridden by a concrete implementations of the queue class. + def flush + end + + # Indicates whether the queue is empty. + # @return [Boolean] true if the queue is empty + def empty? + @queue.empty? + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/sender_base.rb b/source/code/plugin/lib/application_insights/channel/sender_base.rb new file mode 100644 index 000000000..2431bf748 --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/sender_base.rb @@ -0,0 +1,88 @@ +require 'json' +require 'net/http' +require 'openssl' +require 'stringio' +require 'zlib' +require 'logger' + +module ApplicationInsights + module Channel + # The base class for all types of senders for use in conjunction with an + # implementation of {QueueBase}. The queue will notify the sender that it + # needs to pick up items. The concrete sender implementation will listen to + # these notifications and will pull items from the queue using + # {QueueBase#pop} getting at most {#send_buffer_size} items. + # It will then call {#send} using the list of items pulled from the queue. + class SenderBase + # Initializes a new instance of the class. + # @param [String] service_endpoint_uri the address of the service to send + # telemetry data to. + def initialize(service_endpoint_uri) + @service_endpoint_uri = service_endpoint_uri + @queue = nil + @send_buffer_size = 100 + @logger = Logger.new(STDOUT) + end + + # The service endpoint URI where this sender will send data to. + # @return [String] the service endpoint URI. + attr_accessor :service_endpoint_uri + + # The queue that this sender is draining. While {SenderBase} doesn't + # implement any means of doing so, derivations of this class do. + # @return [QueueBase] the queue instance that this sender is draining. + attr_accessor :queue + + # The buffer size for a single batch of telemetry. This is the maximum number + # of items in a single service request that this sender is going to send. + # @return [Fixnum] the maximum number of items in a telemetry batch. + attr_accessor :send_buffer_size + + # The logger for the sender. + attr_accessor :logger + + # Immediately sends the data passed in to {#service_endpoint_uri}. If the + # service request fails, the passed in items are pushed back to the {#queue}. + # @param [Array] data_to_send an array of + # {Contracts::Envelope} objects to send to the service. + def send(data_to_send) + uri = URI(@service_endpoint_uri) + headers = { + 'Accept' => 'application/json', + 'Content-Type' => 'application/json; charset=utf-8', + 'Content-Encoding' => 'gzip' + } + request = Net::HTTP::Post.new(uri.path, headers) + + # Use JSON.generate instead of to_json, otherwise it will + # default to ActiveSupport::JSON.encode for Rails app + json = JSON.generate(data_to_send) + compressed_data = compress(json) + request.body = compressed_data + + http = Net::HTTP.new uri.hostname, uri.port + if uri.scheme.downcase == 'https' + http.use_ssl = true + http.verify_mode = OpenSSL::SSL::VERIFY_NONE + end + + response = http.request(request) + http.finish if http.started? + + if !response.kind_of? Net::HTTPSuccess + @logger.warn('application_insights') { "Failed to send data: #{response.message}" } + end + end + + private + + def compress(string) + wio = StringIO.new("w") + w_gz = Zlib::GzipWriter.new wio, nil, nil + w_gz.write(string) + w_gz.close + wio.string + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/synchronous_queue.rb b/source/code/plugin/lib/application_insights/channel/synchronous_queue.rb new file mode 100644 index 000000000..13c2281ac --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/synchronous_queue.rb @@ -0,0 +1,45 @@ +require_relative 'queue_base' + +module ApplicationInsights + module Channel + # A synchronous queue for use in conjunction with the {SynchronousSender}. + # The queue will call {SenderBase#send} when it reaches {#max_queue_length}, + # or when the consumer calls {#flush}. + # + # @example + # require 'application_insights' + # require 'thread' + # queue = ApplicationInsights::Channel::SynchronousQueue.new nil + # queue.max_queue_length = 1 + # queue.push 1 + class SynchronousQueue < QueueBase + # Initializes a new instance of the class. + # @param [SenderBase] sender the sender object that will be used in + # conjunction with this queue. + def initialize(sender) + super sender + end + + # Flushes the current queue by by calling {#sender}'s + # {SenderBase#send} method. + def flush + local_sender = @sender + return unless local_sender + + while true + # get at most send_buffer_size items and send them + data = [] + while data.length < local_sender.send_buffer_size + item = pop() + break if not item + data.push item + end + + break if data.length == 0 + + local_sender.send(data) + end + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/synchronous_sender.rb b/source/code/plugin/lib/application_insights/channel/synchronous_sender.rb new file mode 100644 index 000000000..ade2f086c --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/synchronous_sender.rb @@ -0,0 +1,17 @@ +require_relative 'sender_base' + +module ApplicationInsights + module Channel + # A synchronous sender that works in conjunction with the {SynchronousQueue}. + # The queue will call {#send} on the current instance with the data to send. + class SynchronousSender < SenderBase + SERVICE_ENDPOINT_URI = 'https://dc.services.visualstudio.com/v2/track' + # Initializes a new instance of the class. + # @param [String] service_endpoint_uri the address of the service to send + # telemetry data to. + def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI) + super service_endpoint_uri + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/telemetry_channel.rb b/source/code/plugin/lib/application_insights/channel/telemetry_channel.rb new file mode 100644 index 000000000..e026ebf7d --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/telemetry_channel.rb @@ -0,0 +1,131 @@ +require 'time' +require_relative 'asynchronous_queue' +require_relative 'asynchronous_sender' +require_relative 'telemetry_context' +require_relative 'synchronous_queue' +require_relative 'synchronous_sender' +require_relative 'contracts/envelope' +require_relative 'contracts/data' +require_relative 'contracts/internal' +require_relative '../../application_insights/version' + +module ApplicationInsights + module Channel + # The telemetry channel is responsible for constructing a + # {Contracts::Envelope} object from the passed in data and specified + # telemetry context. + # + # @example + # require 'application_insights' + # channel = ApplicationInsights::Channel::TelemetryChannel.new + # event = ApplicationInsights::Channel::Contracts::EventData.new name: 'My event' + # channel.write event + class TelemetryChannel + # Initializes a new instance of the class. + # @param [TelemetryContext] context the telemetry context to use when + # sending telemetry data. + # @param [QueueBase] queue the queue to enqueue the resulting + # {Contracts::Envelope} to. + def initialize(context=nil, queue=nil) + @context = context || TelemetryContext.new + @queue = queue || SynchronousQueue.new(SynchronousSender.new) + end + + # The context associated with this channel. All {Contracts::Envelope} + # objects created by this channel will use this value if it's present or if + # none is specified as part of the {#write} call. + # @return [TelemetryContext] the context instance + # (defaults to: TelemetryContext.new) + attr_reader :context + + # The queue associated with this channel. All {Contracts::Envelope} objects + # created by this channel will be pushed to this queue. + # @return [QueueBase] the queue instance (defaults to: SynchronousQueue.new) + attr_reader :queue + + # The sender associated with this channel. This instance will be used to + # transmit telemetry to the service. + # @return [SenderBase] the sender instance (defaults to: SynchronousSender.new) + def sender + @queue.sender + end + + # Flushes the enqueued data by calling {QueueBase#flush}. + def flush + @queue.flush + end + + # Enqueues the passed in data to the {#queue}. If the caller specifies a + # context as well, it will take precedence over the instance in {#context}. + # @param [Object] data the telemetry data to send. This will be wrapped in + # an {Contracts::Envelope} before being enqueued to the {#queue}. + # @param [TelemetryContext] context the override context to use when + # constructing the {Contracts::Envelope}. + # @param [Time|String] time the timestamp of the telemetry used to construct the + # {Contracts::Envelope}. + def write(data, context=nil, time=nil) + local_context = context || @context + raise ArgumentError, 'Context was required but not provided' unless local_context + + if time && time.is_a?(String) + local_time = time + elsif time && time.is_a?(Time) + local_time = time.iso8601(7) + else + local_time = Time.now.iso8601(7) + end + + data_type = data.class.name.gsub(/^.*::/, '') + set_properties data, local_context + data_attributes = { + :base_type => data_type, + :base_data => data + } + envelope_attributes = { + :name => 'Microsoft.ApplicationInsights.' + data_type[0..-5], + :time => local_time, + :i_key => local_context.instrumentation_key, + :tags => get_tags(local_context), + :data => Contracts::Data.new(data_attributes) + } + envelope = Contracts::Envelope.new envelope_attributes + @queue.push(envelope) + end + + private + + def get_tags(context) + hash = {} + internal_context_attributes = { + :sdk_version => 'rb:' + ApplicationInsights::VERSION + } + internal_context = Contracts::Internal.new internal_context_attributes + + [internal_context, + context.application, + context.cloud, + context.device, + context.user, + context.session, + context.location, + context.operation].each { |c| hash.merge!(c.to_h) if c } + + hash.delete_if { |k, v| v.nil? } + + hash + end + + def set_properties(data, context) + if context.properties + properties = data.properties || {} + context.properties.each do |key, value| + unless properties.key?(key) + properties[key] = value + end + end + data.properties = properties + end + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/channel/telemetry_context.rb b/source/code/plugin/lib/application_insights/channel/telemetry_context.rb new file mode 100644 index 000000000..bb24af24e --- /dev/null +++ b/source/code/plugin/lib/application_insights/channel/telemetry_context.rb @@ -0,0 +1,85 @@ +require_relative 'contracts/application' +require_relative 'contracts/cloud' +require_relative 'contracts/device' +require_relative 'contracts/user' +require_relative 'contracts/session' +require_relative 'contracts/operation' +require_relative 'contracts/location' + +module ApplicationInsights + module Channel + # Represents the context for sending telemetry to the + # Application Insights service. + # + # @example + # require 'application_insights' + # context = ApplicationInsights::Channel::TelemetryContext.new + # context.instrumentation_key = '' + # context.application.id = 'My application' + # context.application.ver = '1.2.3' + # context.device.id = 'My current device' + # context.device.oem_name = 'Asus' + # context.device.model = 'X31A' + # context.device.type = "Other" + # context.user.id = 'santa@northpole.net' + class TelemetryContext + # Initializes a new instance of the class. + def initialize + @instrumentation_key = nil + @application = Contracts::Application.new + @cloud = Contracts::Cloud.new + @device = Contracts::Device.new + @user = Contracts::User.new + @session = Contracts::Session.new + @operation = Contracts::Operation.new + @location = Contracts::Location.new + @properties = {} + end + + # The instrumentation key that is used to identify which + # Application Insights application this data is for. + # @return [String] the instrumentation key. + attr_accessor :instrumentation_key + + # The application context. This contains properties of the + # application you are running. + # @return [Contracts::Application] the context object. + attr_accessor :application + + # The cloud context. This contains properties of the + # cloud role you are generating telemetry for. + # @return [Contracts::Cloud] the context object. + attr_accessor :cloud + + # The device context. This contains properties of the + # device you are running on. + # @return [Contracts::Device] the context object. + attr_accessor :device + + # The user context. This contains properties of the + # user you are generating telemetry for. + # @return [Contracts::User] the context object. + attr_accessor :user + + # The session context. This contains properties of the + # session you are generating telemetry for. + # @return [Contracts::Session] the context object. + attr_accessor :session + + # The operation context. This contains properties of the + # operation you are generating telemetry for. + # @return [Contracts::Operation] the context object. + attr_accessor :operation + + # The location context. This contains properties of the + # location you are generating telemetry from. + # @return [Contracts::Location] the context object. + attr_accessor :location + + # The property context. This contains free-form properties + # that you can add to your telemetry. + # @return [Hash] the context object. + attr_accessor :properties + end + end +end diff --git a/source/code/plugin/lib/application_insights/rack/track_request.rb b/source/code/plugin/lib/application_insights/rack/track_request.rb new file mode 100644 index 000000000..62c2b0844 --- /dev/null +++ b/source/code/plugin/lib/application_insights/rack/track_request.rb @@ -0,0 +1,154 @@ +require 'rack' +require 'securerandom' +require_relative '../channel/contracts/request_data' +require_relative '../telemetry_client' + +module ApplicationInsights + module Rack + # Track every request and sends the request data to Application Insights. + class TrackRequest + # Initializes a new instance of the class. + # @param [Object] app the inner rack application. + # @param [String] instrumentation_key to identify which Application Insights + # application this data is for. + # @param [Fixnum] buffer_size the buffer size and the buffered requests would + # send to Application Insights when buffer is full. + # @param [Fixnum] send_interval the frequency (in seconds) to check buffer + # and send buffered requests to Application Insights if any. + def initialize(app, instrumentation_key, buffer_size = 500, send_interval = 60) + @app = app + @instrumentation_key = instrumentation_key + @buffer_size = buffer_size + @send_interval = send_interval + + @sender = Channel::AsynchronousSender.new + @sender.send_interval = @send_interval + queue = Channel::AsynchronousQueue.new @sender + queue.max_queue_length = @buffer_size + @channel = Channel::TelemetryChannel.new nil, queue + + @client = TelemetryClient.new @instrumentation_key, @channel + end + + # Track requests and send data to Application Insights asynchronously. + # @param [Hash] env the rack environment. + def call(env) + # Build a request ID, incorporating one from our request if one exists. + request_id = request_id_header(env['HTTP_REQUEST_ID']) + env['ApplicationInsights.request.id'] = request_id + + start = Time.now + begin + status, headers, response = @app.call(env) + rescue Exception => ex + status = 500 + exception = ex + end + stop = Time.now + + start_time = start.iso8601(7) + duration = format_request_duration(stop - start) + success = status.to_i < 400 + + request = ::Rack::Request.new env + options = options_hash(request) + + data = request_data(request_id, start_time, duration, status, success, options) + context = telemetry_context(request_id, env['HTTP_REQUEST_ID']) + + @client.channel.write data, context, start_time + + if exception + @client.track_exception exception, handled_at: 'Unhandled' + raise exception + end + + [status, headers, response] + end + + private + + def sender=(sender) + if sender.is_a? Channel::AsynchronousSender + @sender = sender + @client.channel.queue.sender = @sender + end + end + + def client + @client + end + + def format_request_duration(duration_seconds) + if duration_seconds >= 86400 + # just return 1 day when it takes more than 1 day which should not happen for requests. + return "%02d.%02d:%02d:%02d.%07d" % [1, 0, 0, 0, 0] + end + + Time.at(duration_seconds).gmtime.strftime("00.%H:%M:%S.%7N") + end + + def request_id_header(request_id) + valid_request_id_header = valid_request_id(request_id) + + length = valid_request_id_header ? 5 : 10 + id = SecureRandom.base64(length) + + if valid_request_id_header + request_id_has_end = %w[. _].include?(request_id[-1]) + request_id << '.' unless request_id_has_end + + return "#{request_id}#{id}_" + end + + "|#{id}." + end + + def valid_request_id(request_id) + request_id && request_id[0] == '|' + end + + def operation_id(id) + # Returns the root ID from the '|' to the first '.' if any. + root_start = id[0] == '|' ? 1 : 0 + + root_end = id.index('.') + root_end = root_end ? root_end - 1 : id.length - root_start + + id[root_start..root_end] + end + + def options_hash(request) + { + name: "#{request.request_method} #{request.path}", + http_method: request.request_method, + url: request.url + } + end + + def request_data(request_id, start_time, duration, status, success, options) + Channel::Contracts::RequestData.new( + :id => request_id || 'Null', + :duration => duration || '0:00:00:00.0000000', + :response_code => status || 200, + :success => success == nil ? true : success, + :name => options[:name], + :url => options[:url], + :properties => options[:properties] || {}, + :measurements => options[:measurements] || {}, + # Must initialize http_method after properties because it's actually stored in properties + :http_method => options[:http_method] + ) + end + + def telemetry_context(request_id, request_id_header) + context = Channel::TelemetryContext.new + context.instrumentation_key = @instrumentation_key + context.operation.id = operation_id(request_id) + context.operation.parent_id = request_id_header + + context + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/telemetry_client.rb b/source/code/plugin/lib/application_insights/telemetry_client.rb new file mode 100644 index 000000000..bd066ae70 --- /dev/null +++ b/source/code/plugin/lib/application_insights/telemetry_client.rb @@ -0,0 +1,232 @@ +require_relative 'channel/telemetry_context' +require_relative 'channel/telemetry_channel' +require_relative 'channel/contracts/page_view_data' +require_relative 'channel/contracts/remote_dependency_data' +require_relative 'channel/contracts/exception_data' +require_relative 'channel/contracts/exception_details' +require_relative 'channel/contracts/event_data' +require_relative 'channel/contracts/data_point' +require_relative 'channel/contracts/data_point_type' +require_relative 'channel/contracts/metric_data' +require_relative 'channel/contracts/message_data' +require_relative 'channel/contracts/stack_frame' +require_relative 'channel/contracts/request_data' +require_relative 'channel/contracts/severity_level' +require_relative 'channel/contracts/reopenings' + +module ApplicationInsights + # The telemetry client used for sending all types of telemetry. It serves as + # the main entry point for interacting with the Application Insights service. + class TelemetryClient + # Initializes a new instance of the class. + # @param [String] instrumentation_key to identify which Application Insights + # application this data is for. + # @param [Channel::TelemetryChannel] telemetry_channel the optional telemetry + # channel to be used instead of constructing a default one. + def initialize(instrumentation_key = nil, telemetry_channel = nil) + @context = Channel::TelemetryContext.new + @context.instrumentation_key = instrumentation_key + @channel = telemetry_channel || Channel::TelemetryChannel.new + end + + # The context associated with this client. All data objects created by this + # client will be accompanied by this value. + # @return [Channel::TelemetryContext] the context instance. + attr_reader :context + + # The channel associated with this telemetry client. All data created by this + # client will be passed along with the {#context} object to + # {Channel::TelemetryChannel#write} + # @return [Channel::TelemetryChannel] the channel instance. + attr_reader :channel + + # Send information about the page viewed in the application (a web page for + # instance). + # @param [String] name the name of the page that was viewed. + # @param [String] url the URL of the page that was viewed. + # @param [Hash] options the options to create the + # {Channel::Contracts::PageViewData} object. + # @option options [Fixnum] :duration the duration of the page view in + # milliseconds. (defaults to: 0) + # @option options [Hash] :properties the set of custom properties the client + # wants attached to this data item. (defaults to: {}) + # @option options [Hash] :measurements the set of custom measurements the + # client wants to attach to this data item (defaults to: {}) + def track_page_view(name, url, options={}) + data_attributes = { + :name => name || 'Null', + :url => url, + :duration => options[:duration], + :properties => options[:properties] || {}, + :measurements => options[:measurements] || {} + } + data = Channel::Contracts::PageViewData.new data_attributes + self.channel.write(data, self.context) + end + + # Send information about a single exception that occurred in the application. + # @param [Exception] exception the exception that the client wants to send. + # @param [Hash] options the options to create the + # {Channel::Contracts::ExceptionData} object. + # @option options [String] :handled_at the type of exception + # (defaults to: 'UserCode') + # @option options [Hash] :properties the set of custom properties the client + # wants attached to this data item. (defaults to: {}) + # @option options [Hash] :measurements the set of custom measurements the + # client wants to attach to this data item (defaults to: {}) + def track_exception(exception, options={}) + return unless exception.is_a? Exception + + parsed_stack = [] + if exception.backtrace + frame_pattern = /^(?.*):(?\d+)(\.|:in `((?.*)'$))/ + + exception.backtrace.each_with_index do |frame, counter| + match = frame_pattern.match frame + stack_frame = Channel::Contracts::StackFrame.new( + :assembly => 'Unknown', + :file_name => match['file'], + :level => counter, + :line => match['line'], + :method => match['method'] + ) + + parsed_stack << stack_frame + end + end + + details = Channel::Contracts::ExceptionDetails.new( + :id => 1, + :outer_id => 0, + :type_name => exception.class.name, + :message => exception.message, + :has_full_stack => exception.backtrace != nil, + :stack => (exception.backtrace.join("\n") if exception.backtrace), + :parsed_stack => parsed_stack + ) + + data = Channel::Contracts::ExceptionData.new( + :exceptions => [details], + :properties => options[:properties] || {}, + :measurements => options[:measurements] || {}, + # Must initialize handled_at after properties because it's actually stored in properties + :handled_at => options.fetch(:handled_at, 'UserCode') + ) + + self.channel.write(data, self.context) + end + + # Send information about a single event that has occurred in the context of + # the application. + # @param [String] name the data to associate to this event. + # @param [Hash] options the options to create the + # {Channel::Contracts::EventData} object. + # @option options [Hash] :properties the set of custom properties the client + # wants attached to this data item. (defaults to: {}) + # @option options [Hash] :measurements the set of custom measurements the + # client wants to attach to this data item (defaults to: {}) + def track_event(name, options={}) + data = Channel::Contracts::EventData.new( + :name => name || 'Null', + :properties => options[:properties] || {}, + :measurements => options[:measurements] || {} + ) + + self.channel.write(data, self.context) + end + + # Send information about a single metric data point that was captured for + # the application. + # @param [String] name the name of the metric that was captured. + # @param [Fixnum] value the value of the metric that was captured. + # @param [Hash] options the options to create the + # {Channel::Contracts::MetricData} object. + # @option options [Channel::Contracts::DataPointType] :type the type of the + # metric (defaults to: {Channel::Contracts::DataPointType::AGGREGATION}) + # @option options [Fixnum] :count the number of metrics that were aggregated + # into this data point (defaults to: 0) + # @option options [Fixnum] :min the minimum of all metrics collected that + # were aggregated into this data point (defaults to: 0) + # @option options [Fixnum] :max the maximum of all metrics collected that + # were aggregated into this data point (defaults to: 0) + # @option options [Fixnum] :std_dev the standard deviation of all metrics + # collected that were aggregated into this data point (defaults to: 0) + # @option options [Hash] :properties the set of custom properties the client + # wants attached to this data item. (defaults to: {}) + # @option options [Hash] :measurements the set of custom measurements the + # client wants to attach to this data item (defaults to: {}) + def track_metric(name, value, options={}) + data_point = Channel::Contracts::DataPoint.new( + :name => name || 'Null', + :value => value || 0, + :kind => options[:type] || Channel::Contracts::DataPointType::AGGREGATION, + :count => options[:count], + :min => options[:min], + :max => options[:max], + :std_dev => options[:std_dev] + ) + + data = Channel::Contracts::MetricData.new( + :metrics => [data_point], + :properties => options[:properties] || {} + ) + + self.channel.write(data, self.context) + end + + # Sends a single trace statement. + # @param [String] name the trace statement. + # @param [Channel::Contracts::SeverityLevel] severity_level the severity level. + # @param [Hash] options the options to create the + # {Channel::Contracts::EventData} object. + # @option options [Hash] :properties the set of custom properties the client + # wants attached to this data item. (defaults to: {}) + def track_trace(name, severity_level = nil, options={}) + data = Channel::Contracts::MessageData.new( + :message => name || 'Null', + :severity_level => severity_level || Channel::Contracts::SeverityLevel::INFORMATION, + :properties => options[:properties] || {} + ) + + self.channel.write(data, self.context) + end + + # Sends a single request. + # @param [String] id the unique identifier of the request. + # @param (String) start_time the start time of the request. + # @param [String] duration the duration to process the request. + # @param [String] response_code the response code of the request. + # @param [Boolean] success indicates whether the request succeeds or not. + # @param [Hash] options the options to create the + # {Channel::Contracts::RequestData} object. + # @option options [String] :name the name of the request. + # @option options [String] :http_method the http method used for the request. + # @option options [String] :url the url of the request. + # @option options [Hash] :properties the set of custom properties the client + # wants attached to this data item. (defaults to: {}) + # @option options [Hash] :measurements the set of custom measurements the + # client wants to attach to this data item (defaults to: {}) + def track_request(id, start_time, duration, response_code, success, options={}) + data = Channel::Contracts::RequestData.new( + :id => id || 'Null', + :duration => duration || '0:00:00:00.0000000', + :response_code => response_code || 200, + :success => success = nil ? true : success, + :name => options[:name], + :url => options[:url], + :properties => options[:properties] || {}, + :measurements => options[:measurements] || {}, + # Must initialize http_method after properties because it's actually stored in properties + :http_method => options[:http_method] + ) + + self.channel.write(data, self.context, start_time) + end + + # Flushes data in the queue. Data in the queue will be sent either immediately + # irrespective of what sender is being used. + def flush + self.channel.flush + end + end +end diff --git a/source/code/plugin/lib/application_insights/unhandled_exception.rb b/source/code/plugin/lib/application_insights/unhandled_exception.rb new file mode 100644 index 000000000..aa87b6f85 --- /dev/null +++ b/source/code/plugin/lib/application_insights/unhandled_exception.rb @@ -0,0 +1,49 @@ +require_relative 'telemetry_client' +require_relative 'channel/telemetry_channel' +require_relative 'channel/synchronous_queue' +require_relative 'channel/synchronous_sender' + +include ApplicationInsights + +module ApplicationInsights + module UnhandledException + @sender = nil + + # Auto collects unhandled exception and send to the Application Insights service. + # @param (string) instrumentation_key used to identify which Application + # Insights application this data is for. + # @example + # require 'application_insights' + # ApplicationInsights::UnhandledException.collect('') + # raise Exception, 'Boom!' + def self.collect(instrumentation_key) + at_exit do + # Avoid sending exception more than once if this method got invoked multiple times + send(instrumentation_key) unless @sender + end + end + + # @api private + # Send the last raised exception to the Application Insights service if + # telemetry_sender is not customized. + # @param (string) instrumentation_key used to identify which Application + # Insights application this data is for. + # @param (SenderBase) telemetry_sender used to send the last raised exception. + def self.send(instrumentation_key, telemetry_sender = nil) + if $! && !$!.is_a?(SystemExit) && !$!.is_a?(SignalException) + if telemetry_sender + @sender = telemetry_sender + elsif !@sender + # Use a synchronized sender to guarantee the data would be sent out once flush + @sender = Channel::SynchronousSender.new + end + + queue = Channel::SynchronousQueue.new @sender + channel = Channel::TelemetryChannel.new nil, queue + client = TelemetryClient.new instrumentation_key, channel + client.track_exception($!, handled_at: 'Unhandled') + client.flush + end + end + end +end diff --git a/source/code/plugin/lib/application_insights/version.rb b/source/code/plugin/lib/application_insights/version.rb new file mode 100644 index 000000000..d2d56e833 --- /dev/null +++ b/source/code/plugin/lib/application_insights/version.rb @@ -0,0 +1,3 @@ +module ApplicationInsights + VERSION = '0.5.7'.freeze +end From 6698fcd365328f31b7cbda6fec205cec1ef7933c Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Wed, 7 Nov 2018 16:21:53 -0800 Subject: [PATCH 30/88] Fix Telemetry Bug -- Initialize Telemetry Client after Initializing all required properties (#162) --- source/code/go/src/plugins/oms.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index e0abaea1f..51a2bd47e 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -334,13 +334,6 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { ImageIDMap = make(map[string]string) NameIDMap = make(map[string]string) - ret, err := InitializeTelemetryClient(agentVersion) - if ret != 0 || err != nil { - message := fmt.Sprintf("Error During Telemetry Initialization :%s", err.Error()) - fmt.Printf(message) - Log(message) - } - pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { message := fmt.Sprintf("Error Reading plugin config path : %s \n", err.Error()) @@ -398,6 +391,13 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { Computer = strings.TrimSuffix(ToString(containerHostName), "\n") Log("Computer == %s \n", Computer) + ret, err := InitializeTelemetryClient(agentVersion) + if ret != 0 || err != nil { + message := fmt.Sprintf("Error During Telemetry Initialization :%s", err.Error()) + fmt.Printf(message) + Log(message) + } + // Initialize KubeAPI Client config, err := rest.InClusterConfig() if err != nil { From ad6bb933f64c7d32c3eb779d031327c76e12d2e4 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Mon, 12 Nov 2018 11:45:57 -0800 Subject: [PATCH 31/88] Fix kube events memory leak due to yaml serialization for > 5k events (#163) --- source/code/plugin/in_kube_events.rb | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index 6a6ae9296..5df31df95 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -10,7 +10,6 @@ class Kube_Event_Input < Input def initialize super - require 'yaml' require 'json' require_relative 'KubernetesApiClient' @@ -62,6 +61,7 @@ def enumerate(eventList = nil) eventStream = MultiEventStream.new events['items'].each do |items| record = {} + # - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated eventId = items['metadata']['uid'] + "/" + items['count'].to_s newEventQueryState.push(eventId) @@ -86,7 +86,7 @@ def enumerate(eventList = nil) end record['ClusterName'] = KubernetesApiClient.getClusterName record['ClusterId'] = KubernetesApiClient.getClusterId - eventStream.add(emitTime, record) if record + eventStream.add(emitTime, record) if record end router.emit_stream(@tag, eventStream) if eventStream end @@ -121,7 +121,10 @@ def getEventQueryState eventQueryState = [] begin if File.file?(@@KubeEventsStateFile) - eventQueryState = YAML.load_file(@@KubeEventsStateFile, []) + # Do not read the entire file in one shot as it spikes memory (50+MB) for ~5k events + File.foreach(@@KubeEventsStateFile) do |line| + eventQueryState.push(line.chomp) #puts will append newline which needs to be removed + end end rescue => errorStr $log.warn $log.warn line.dump, error: errorStr.to_s @@ -132,7 +135,12 @@ def getEventQueryState def writeEventQueryState(eventQueryState) begin - File.write(@@KubeEventsStateFile, eventQueryState.to_yaml) + if(!eventQueryState.nil? && !eventQueryState.empty?) + # No need to close file handle (f) due to block scope + File.open(@@KubeEventsStateFile, "w") do |f| + f.puts(eventQueryState) + end + end rescue => errorStr $log.warn $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) From eff92df54914482b91604b90622fd9fdf2d917eb Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Wed, 14 Nov 2018 15:48:23 -0800 Subject: [PATCH 32/88] Setting Timeout for HTTP Client in PostDataHelper in outoms go plugin(#164) --- source/code/go/src/plugins/utils.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/utils.go b/source/code/go/src/plugins/utils.go index 91e433a0f..85af80d7a 100644 --- a/source/code/go/src/plugins/utils.go +++ b/source/code/go/src/plugins/utils.go @@ -70,7 +70,10 @@ func CreateHTTPClient() { tlsConfig.BuildNameToCertificate() transport := &http.Transport{TLSClientConfig: tlsConfig} - HTTPClient = http.Client{Transport: transport} + HTTPClient = http.Client{ + Transport: transport, + Timeout: 30 * time.Second, + } Log("Successfully created HTTP Client") } From 9893e36d3aeb6a05259a45d449ad2b04453418ea Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Thu, 15 Nov 2018 17:01:18 -0800 Subject: [PATCH 33/88] Vishwa/perftelemetry 2 (#165) * add cpu usage telemetry for ds & rs * add cpu & memory usage telemetry for ds & rs --- .../code/plugin/ApplicationInsightsUtility.rb | 32 ++++++++++++ .../code/plugin/CAdvisorMetricsAPIClient.rb | 51 +++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index 14fc9f2f8..78553a83f 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -5,6 +5,7 @@ class ApplicationInsightsUtility require_relative 'lib/application_insights' require_relative 'omslog' require_relative 'DockerApiClient' + require_relative 'oms_common' require 'json' require 'base64' @@ -20,6 +21,7 @@ class ApplicationInsightsUtility @@EnvApplicationInsightsKey = 'APPLICATIONINSIGHTS_AUTH' @@CustomProperties = {} @@Tc = nil + @@hostName = (OMS::Common.get_hostname) def initialize end @@ -124,6 +126,36 @@ def sendTelemetry(pluginName, properties) end end + #Method to send metric. It will merge passed-in properties with common custom properties + def sendMetricTelemetry(metricName, metricValue, properties) + begin + if (metricName.empty? || metricName.nil?) + $log.warn("SendMetricTelemetry: metricName is missing") + return + end + if @@CustomProperties.empty? || @@CustomProperties.nil? + initializeUtility + end + telemetryProps = {} + telemetryProps["Computer"] = @@hostName + # add common dimensions + @@CustomProperties.each{ |k,v| telemetryProps[k]=v} + # add passed-in dimensions if any + if (!properties.nil? && !properties.empty?) + properties.each{ |k,v| telemetryProps[k]=v} + end + if !(@@Tc.nil?) + @@Tc.track_metric metricName, metricValue, + :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, + :properties => telemetryProps + @@Tc.flush + $log.info("AppInsights metric Telemetry #{metricName} sent successfully") + end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendMetricTelemetry - error: #{errorStr}") + end + end + def getWorkspaceId() begin adminConf = {} diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb index c10cbad4a..9e47e5a9e 100644 --- a/source/code/plugin/CAdvisorMetricsAPIClient.rb +++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb @@ -12,6 +12,7 @@ class CAdvisorMetricsAPIClient require_relative 'oms_common' require_relative 'KubernetesApiClient' + require_relative 'ApplicationInsightsUtility' @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt" @Log = Logger.new(@LogPath, 2, 10*1048576) #keep last 2 files, max log file size = 10M @@ -19,6 +20,8 @@ class CAdvisorMetricsAPIClient @@rxBytesTimeLast = nil @@txBytesLast = nil @@txBytesTimeLast = nil + @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i + @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i def initialize end @@ -97,10 +100,15 @@ def getMetrics() def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn) metricItems = [] clusterId = KubernetesApiClient.getClusterId + timeDifference = (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs + timeDifferenceInMinutes = timeDifference/60 begin metricInfo = metricJSON metricInfo['pods'].each do |pod| podUid = pod['podRef']['uid'] + podName = pod['podRef']['name'] + podNamespace = pod['podRef']['namespace'] + if (!pod['containers'].nil?) pod['containers'].each do |container| #cpu metric @@ -124,9 +132,29 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met metricProps['Collections'].push(metricCollections) metricItem['DataItems'].push(metricProps) metricItems.push(metricItem) + #Telemetry about agent performance + begin + # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers + # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use + if (podName.downcase.start_with?('omsagent-') && podNamespace.eql?("kube-system") && containerName.downcase.start_with?('omsagent') && metricNametoReturn.eql?("cpuUsageNanoCores")) + + if (timeDifferenceInMinutes >= 10) + telemetryProps = {} + telemetryProps['PodName'] = podName + telemetryProps['ContainerName'] = containerName + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) + end + end + rescue => errorStr + $log.warn("Exception while generating Telemetry from getcontainerCpuMetricItems failed: #{errorStr} for metric #{cpuMetricNameToCollect}") + end end end end + # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs) + if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("cpuUsageNanoCores")) + @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i + end rescue => error @Log.warn("getcontainerCpuMetricItems failed: #{error} for metric #{cpuMetricNameToCollect}") return metricItems @@ -137,10 +165,14 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollect, metricNametoReturn) metricItems = [] clusterId = KubernetesApiClient.getClusterId + timeDifference = (DateTime.now.to_time.to_i - @@telemetryMemoryMetricTimeTracker).abs + timeDifferenceInMinutes = timeDifference/60 begin metricInfo = metricJSON metricInfo['pods'].each do |pod| podUid = pod['podRef']['uid'] + podName = pod['podRef']['name'] + podNamespace = pod['podRef']['namespace'] if (!pod['containers'].nil?) pod['containers'].each do |container| containerName = container['name'] @@ -164,9 +196,28 @@ def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollec metricProps['Collections'].push(metricCollections) metricItem['DataItems'].push(metricProps) metricItems.push(metricItem) + #Telemetry about agent performance + begin + # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers + # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use + if (podName.downcase.start_with?('omsagent-') && podNamespace.eql?("kube-system") && containerName.downcase.start_with?('omsagent') && metricNametoReturn.eql?("memoryRssBytes")) + if (timeDifferenceInMinutes >= 10) + telemetryProps = {} + telemetryProps['PodName'] = podName + telemetryProps['ContainerName'] = containerName + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) + end + end + rescue => errorStr + $log.warn("Exception while generating Telemetry from getcontainerMemoryMetricItems failed: #{errorStr} for metric #{memoryMetricNameToCollect}") + end end end end + # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs) + if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("memoryRssBytes")) + @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i + end rescue => error @Log.warn("getcontainerMemoryMetricItems failed: #{error} for metric #{memoryMetricNameToCollect}") @Log.warn metricJSON From 4f3c8988e4d1a989f8e9ab0e897443f1f4a94563 Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Tue, 27 Nov 2018 10:39:41 -0800 Subject: [PATCH 34/88] environment variable fix (#166) * environment variable fix * updating agent version --- installer/conf/td-agent-bit.conf | 2 +- source/code/plugin/in_containerinventory.rb | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 2a6199987..fe174f9a5 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -28,4 +28,4 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion ciprod10162018-2 + AgentVersion internaltest1126 diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb index 43811e1e1..f501421a2 100644 --- a/source/code/plugin/in_containerinventory.rb +++ b/source/code/plugin/in_containerinventory.rb @@ -56,6 +56,11 @@ def obtainContainerConfig(instance, container) envValue = configValue['Env'] envValueString = (envValue.nil?) ? "" : envValue.to_s + # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE + if /AZMON_COLLECT_ENV=FALSE/i.match(envValueString) + envValueString = ["AZMON_COLLECT_ENV=FALSE"] + $log.warn("Environment Variable collection for container: #{container['Id']} skipped because AZMON_COLLECT_ENV is set to false") + end # Restricting the ENV string value to 200kb since the size of this string can go very high if envValueString.length > 200000 envValueStringTruncated = envValueString.slice(0..200000) From 5e16467696df96d59d32d7219b901c1450b44201 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Tue, 27 Nov 2018 11:20:51 -0800 Subject: [PATCH 35/88] Fixing a bug where we were crashing due to container statuses not present when not was lost (#167) --- source/code/plugin/in_kube_podinventory.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index 2cd1e1bc3..ec76bac61 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -101,7 +101,7 @@ def parse_and_emit_records(podInventory, serviceList) #podStatus # the below is for accounting 'NodeLost' scenario, where-in the pod(s) in the lost node is still being reported as running podReadyCondition = true - if !items['status']['reason'].nil? && items['status']['reason'] == "NodeLost" + if !items['status']['reason'].nil? && items['status']['reason'] == "NodeLost" && !items['status']['conditions'].nil? items['status']['conditions'].each do |condition| if condition['type'] == "Ready" && condition['status'] == "False" podReadyCondition = false From b482b1ecb667d4f75cd3902c5baf6debd25990ef Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 28 Nov 2018 17:37:41 -0800 Subject: [PATCH 36/88] Updating title --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0c543e716..8755cedb3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# AKS Container Health monitoring +# Azure Monitor for Containers ## Code of Conduct @@ -40,4 +40,4 @@ additional questions or comments. - Kubernetes RBAC enablement - Latest released omsagent (1.6.0-42) - Bug fix so that we do not collect kube-system namespace container logs when kube api calls fail occasionally (Bug #215107) -- .yaml changes (for RBAC) \ No newline at end of file +- .yaml changes (for RBAC) From d75ba897b9ccd58a4ad8a049b87b09a990ea7934 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 28 Nov 2018 17:40:41 -0800 Subject: [PATCH 37/88] updating right versions for last release --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8755cedb3..ace2ff57b 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ additional questions or comments. ## Release History -### 10/16/2018 - Version microsoft/oms:ciprod10162018 +### 10/16/2018 - Version microsoft/oms:ciprod10162018-2 - Fix for containerID being 00000-00000-00000 - Move from fluentD to fluentbit for container log collection - Seg fault fixes in json parsing for container inventory & container image inventory From cbd815c90bea4f7878eb6c0908f3d0456737dbd5 Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Thu, 29 Nov 2018 11:25:15 -0800 Subject: [PATCH 38/88] Updating the break condition to look for end of response (#168) * Updating the break condition to look for end of response * changes for docker response --- source/code/plugin/DockerApiClient.rb | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index b93411980..e12ef13ec 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -19,7 +19,7 @@ def initialize class << self # Make docker socket call for requests - def getResponse(request, isMultiJson) + def getResponse(request, isMultiJson, isVersion) begin socket = UNIXSocket.new(@@SocketPath) dockerResponse = "" @@ -36,8 +36,9 @@ def getResponse(request, isMultiJson) rescue Timeout::Error $log.warn("Socket read timedout for request: #{request} @ #{Time.now.utc.iso8601}") isTimeOut = true + break end - break if responseChunk.length < @@ChunkSize + break if (isVersion)? (responseChunk.length < @@ChunkSize) : (responseChunk.end_with? "0\r\n\r\n") end socket.close return (isTimeOut)? nil : parseResponse(dockerResponse, isMultiJson) @@ -71,7 +72,7 @@ def parseResponse(dockerResponse, isMultiJson) def getDockerHostName() dockerHostName = "" request = DockerApiRestHelper.restDockerInfo - response = getResponse(request, false) + response = getResponse(request, false, false) if (response != nil) dockerHostName = response['Name'] end @@ -81,7 +82,7 @@ def getDockerHostName() def listContainers() ids = [] request = DockerApiRestHelper.restDockerPs - containers = getResponse(request, true) + containers = getResponse(request, true, false) if !containers.nil? && !containers.empty? containers.each do |container| ids.push(container['Id']) @@ -121,7 +122,7 @@ def getImageIdMap() result = nil begin request = DockerApiRestHelper.restDockerImages - images = getResponse(request, true) + images = getResponse(request, true, false) if !images.nil? && !images.empty? result = {} images.each do |image| @@ -144,13 +145,13 @@ def getImageIdMap() def dockerInspectContainer(id) request = DockerApiRestHelper.restDockerInspect(id) - return getResponse(request, false) + return getResponse(request, false, false) end # This method returns docker version and docker api version for telemetry def dockerInfo() request = DockerApiRestHelper.restDockerVersion - response = getResponse(request, false) + response = getResponse(request, false, true) dockerInfo = {} if (response != nil) dockerInfo['Version'] = response['Version'] @@ -159,4 +160,4 @@ def dockerInfo() return dockerInfo end end -end \ No newline at end of file +end From d0d5bf78798e3d90655fc08f8a1666daa30c47d3 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Thu, 29 Nov 2018 12:01:11 -0800 Subject: [PATCH 39/88] updating AgentVersion for telemetry --- installer/conf/td-agent-bit.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index fe174f9a5..c92bcdf07 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -28,4 +28,4 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion internaltest1126 + AgentVersion ciprod11292018 From bfe27e5c6f7c3a97dc98f9e7296f25ea2c1d5a36 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Thu, 29 Nov 2018 12:16:35 -0800 Subject: [PATCH 40/88] Updating readme for latest release changes --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index ace2ff57b..17a3cf3ad 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,22 @@ additional questions or comments. ## Release History +### 11/29/2018 - Version microsoft/oms:ciprod11292018 +- Disable Container Image inventory workflow +- Kube_Events memory leak fix for replica-set +- Timeout (30 secs) for outOMS +- Reduce critical lock duration for quicker log processing (for log enrichment) +- Disable OMI based Container Inventory workflow to fluentD based Container Inventory +- Moby support for the new Container Inventory workflow +- Ability to disable environment variables collection by individual container +- Bugfix - No inventory data due to container status(es) not available +- Agent telemetry cpu usage & memory usage (for DaemonSet and ReplicaSet) +- Agent telemetry - log generation rate +- Agent telemetry - container count per node +- Agent telemetry - collect container logs from agent (DaemonSet and ReplicaSet) as AI trace +- Agent telemetry - errors/exceptions for Container Inventory workflow +- Agent telemetry - Container Inventory Heartbeat + ### 10/16/2018 - Version microsoft/oms:ciprod10162018-2 - Fix for containerID being 00000-00000-00000 - Move from fluentD to fluentbit for container log collection From a621f883b0059db69ea1c2df48eef9671bc07b7e Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Sun, 16 Dec 2018 20:17:56 -0800 Subject: [PATCH 41/88] Changes - (#173) * use /var/log for state * new metric ContainerLogsAgentSideLatencyMs * new field 'timeOfComand' --- installer/conf/td-agent-bit.conf | 2 +- source/code/go/src/plugins/oms.go | 43 ++++++++++++++++++------- source/code/go/src/plugins/telemetry.go | 12 +++++++ 3 files changed, 45 insertions(+), 12 deletions(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index c3252a185..b6b9bcc44 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -8,7 +8,7 @@ Name tail Tag oms.container.log.* Path /var/log/containers/*.log - DB /var/opt/microsoft/docker-cimprov/state/fblogs.db + DB /var/log/omsagent-fblogs.db Parser docker Mem_Buf_Limit 30m Path_Key filepath diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 9876acc42..30e844915 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -77,9 +77,10 @@ var ( // DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin type DataItem struct { - LogEntry string `json:"LogEntry"` - LogEntrySource string `json:"LogEntrySource"` - LogEntryTimeStamp string `json:"LogEntryTimeStamp"` + LogEntry string `json:"LogEntry"` + LogEntrySource string `json:"LogEntrySource"` + LogEntryTimeStamp string `json:"LogEntryTimeStamp"` + LogEntryTimeOfCommand string `json:"TimeOfCommand"` ID string `json:"Id"` Image string `json:"Image"` Name string `json:"Name"` @@ -204,6 +205,8 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { start := time.Now() var dataItems []DataItem + var maxLatency float64 + var maxLatencyContainer string ignoreIDSet := make(map[string]bool) imageIDMap := make(map[string]string) nameIDMap := make(map[string]string) @@ -248,18 +251,32 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { Log("ContainerId %s not present in Map ", containerID) } + dataItem := DataItem{ - ID: stringMap["Id"], - LogEntry: stringMap["LogEntry"], - LogEntrySource: stringMap["LogEntrySource"], - LogEntryTimeStamp: stringMap["LogEntryTimeStamp"], - SourceSystem: stringMap["SourceSystem"], - Computer: Computer, - Image: stringMap["Image"], - Name: stringMap["Name"], + ID: stringMap["Id"], + LogEntry: stringMap["LogEntry"], + LogEntrySource: stringMap["LogEntrySource"], + LogEntryTimeStamp: stringMap["LogEntryTimeStamp"], + LogEntryTimeOfCommand: start.Format(time.RFC3339), + SourceSystem: stringMap["SourceSystem"], + Computer: Computer, + Image: stringMap["Image"], + Name: stringMap["Name"], } dataItems = append(dataItems, dataItem) + loggedTime, e := time.Parse(time.RFC3339, dataItem.LogEntryTimeStamp) + if e!= nil { + message := fmt.Sprintf("Error while converting LogEntryTimeStamp for telemetry purposes: %s", e.Error()) + Log(message) + SendException(message) + } else { + ltncy := float64(start.Sub(loggedTime) / time.Millisecond) + if ltncy >= maxLatency { + maxLatency = ltncy + maxLatencyContainer = dataItem.Name + "=" + dataItem.ID + } + } } if len(dataItems) > 0 { @@ -302,6 +319,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { ContainerLogTelemetryMutex.Lock() FlushedRecordsCount += float64(numRecords) FlushedRecordsTimeTaken += float64(elapsed / time.Millisecond) + if maxLatency >= AgentLogProcessingMaxLatencyMs { + AgentLogProcessingMaxLatencyMs = maxLatency + AgentLogProcessingMaxLatencyMsContainer = maxLatencyContainer + } ContainerLogTelemetryMutex.Unlock() } diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 5952ac9ac..0d5513362 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -17,6 +17,10 @@ var ( FlushedRecordsCount float64 // FlushedRecordsTimeTaken indicates the cumulative time taken to flush the records for the current period FlushedRecordsTimeTaken float64 + // This is telemetry for how old/latent logs we are processing in milliseconds (max over a period of time) + AgentLogProcessingMaxLatencyMs float64 + // This is telemetry for which container logs were latent (max over a period of time) + AgentLogProcessingMaxLatencyMsContainer string // CommonProperties indicates the dimensions that are sent with every event/metric CommonProperties map[string]string // TelemetryClient is the client used to send the telemetry @@ -35,6 +39,7 @@ const ( envAppInsightsAuth = "APPLICATIONINSIGHTS_AUTH" metricNameAvgFlushRate = "ContainerLogAvgRecordsFlushedPerSec" metricNameAvgLogGenerationRate = "ContainerLogsGeneratedPerSec" + metricNameAgentLogProcessingMaxLatencyMs = "ContainerLogsAgentSideLatencyMs" defaultTelemetryPushIntervalSeconds = 300 eventNameContainerLogInit = "ContainerLogPluginInitialized" @@ -62,12 +67,19 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { logRate := FlushedRecordsCount / float64(elapsed/time.Second) FlushedRecordsCount = 0.0 FlushedRecordsTimeTaken = 0.0 + logLatencyMs := AgentLogProcessingMaxLatencyMs + logLatencyMsContainer := AgentLogProcessingMaxLatencyMsContainer + AgentLogProcessingMaxLatencyMs = 0 + AgentLogProcessingMaxLatencyMsContainer = "" ContainerLogTelemetryMutex.Unlock() flushRateMetric := appinsights.NewMetricTelemetry(metricNameAvgFlushRate, flushRate) TelemetryClient.Track(flushRateMetric) logRateMetric := appinsights.NewMetricTelemetry(metricNameAvgLogGenerationRate, logRate) TelemetryClient.Track(logRateMetric) + logLatencyMetric := appinsights.NewMetricTelemetry(metricNameAgentLogProcessingMaxLatencyMs, logLatencyMs) + logLatencyMetric.Properties["Container"] = logLatencyMsContainer + TelemetryClient.Track(logLatencyMetric) start = time.Now() } } From c9cf4fd7e5b3176136b47390ba405ee6afd6719b Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Mon, 17 Dec 2018 13:58:09 -0800 Subject: [PATCH 42/88] Rashmi/kubenodeinventory (#174) * containernodeinventory changes * changes for containernodeinventory * changes to add node telemetry * pod telemetry cahnges * updated telemetry changes * changes to get uid of owner references as controller id --- installer/conf/container.conf | 23 ---------- installer/conf/kube.conf | 13 ++++++ .../code/plugin/ApplicationInsightsUtility.rb | 6 +-- source/code/plugin/in_kube_nodes.rb | 45 ++++++++++++++++--- source/code/plugin/in_kube_podinventory.rb | 20 +++++++++ 5 files changed, 76 insertions(+), 31 deletions(-) diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 798bd8eb6..091753230 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -15,16 +15,6 @@ log_level debug -# Container host inventory - - type omi - run_interval 60s - tag oms.api.ContainerNodeInventory - items [ - ["root/cimv2","Container_HostInventory"] - ] - - #cadvisor perf type cadvisorperf @@ -33,19 +23,6 @@ log_level debug - - type out_oms_api - log_level debug - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer - buffer_queue_limit 20 - flush_interval 20s - retry_limit 10 - retry_wait 15s - max_retry_wait 9m - - type out_oms log_level debug diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 94fe2ef0b..22c51ad0e 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -118,6 +118,19 @@ max_retry_wait 9m + + type out_oms_api + log_level debug + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer + buffer_queue_limit 20 + flush_interval 20s + retry_limit 10 + retry_wait 15s + max_retry_wait 9m + + type out_oms log_level debug diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index 78553a83f..76e0b2926 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -83,7 +83,7 @@ def sendHeartBeatEvent(pluginName) end end - def sendCustomEvent(pluginName, properties) + def sendCustomMetric(pluginName, properties) begin if !(@@Tc.nil?) @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], @@ -93,7 +93,7 @@ def sendCustomEvent(pluginName, properties) $log.info("AppInsights Container Count Telemetry sent successfully") end rescue => errorStr - $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}") + $log.warn("Exception in AppInsightsUtility: sendCustomMetric - error: #{errorStr}") end end @@ -120,7 +120,7 @@ def sendTelemetry(pluginName, properties) end @@CustomProperties['Computer'] = properties['Computer'] sendHeartBeatEvent(pluginName) - sendCustomEvent(pluginName, properties) + sendCustomMetric(pluginName, properties) rescue => errorStr $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}") end diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index edbbdd37f..1c792d0da 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -6,12 +6,15 @@ module Fluent class Kube_nodeInventory_Input < Input Plugin.register_input('kubenodeinventory', self) + @@ContainerNodeInventoryTag = 'oms.api.ContainerNodeInventory' + def initialize super require 'yaml' require 'json' require_relative 'KubernetesApiClient' + require_relative 'ApplicationInsightsUtility' require_relative 'oms_common' require_relative 'omslog' end @@ -29,6 +32,7 @@ def start @condition = ConditionVariable.new @mutex = Mutex.new @thread = Thread.new(&method(:run_periodic)) + @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i end end @@ -46,15 +50,22 @@ def enumerate currentTime = Time.now emitTime = currentTime.to_f batchTime = currentTime.utc.iso8601 - $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") - nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body) - $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") + telemetrySent = false + $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") + nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body) + $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") begin if(!nodeInventory.empty?) eventStream = MultiEventStream.new + containerNodeInventoryEventStream = MultiEventStream.new #get node inventory nodeInventory['items'].each do |items| record = {} + # Sending records for ContainerNodeInventory + containerNodeInventoryRecord = {} + containerNodeInventoryRecord['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated + containerNodeInventoryRecord['Computer'] = items['metadata']['name'] + record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated record['Computer'] = items['metadata']['name'] record['ClusterName'] = KubernetesApiClient.getClusterName @@ -89,16 +100,40 @@ def enumerate end - record['KubeletVersion'] = items['status']['nodeInfo']['kubeletVersion'] - record['KubeProxyVersion'] = items['status']['nodeInfo']['kubeProxyVersion'] + nodeInfo = items['status']['nodeInfo'] + record['KubeletVersion'] = nodeInfo['kubeletVersion'] + record['KubeProxyVersion'] = nodeInfo['kubeProxyVersion'] + containerNodeInventoryRecord['OperatingSystem'] = nodeInfo['osImage'] + dockerVersion = nodeInfo['containerRuntimeVersion'] + dockerVersion.slice! "docker://" + containerNodeInventoryRecord['DockerVersion'] = dockerVersion + # ContainerNodeInventory data for docker version and operating system. + containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryRecord) if containerNodeInventoryRecord + wrapper = { "DataType"=>"KUBE_NODE_INVENTORY_BLOB", "IPName"=>"ContainerInsights", "DataItems"=>[record.each{|k,v| record[k]=v}] } eventStream.add(emitTime, wrapper) if wrapper + # Adding telemetry to send node telemetry every 5 minutes + timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference/60 + if (timeDifferenceInMinutes >= 5) + properties = {} + properties["Computer"] = record["Computer"] + ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties) + capacityInfo = items['status']['capacity'] + ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties) + ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties) + telemetrySent = true + end end router.emit_stream(@tag, eventStream) if eventStream + router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream + if telemetrySent == true + @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i + end @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index ec76bac61..c6873e8fe 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -10,8 +10,10 @@ def initialize super require 'yaml' require 'json' + require 'set' require_relative 'KubernetesApiClient' + require_relative 'ApplicationInsightsUtility' require_relative 'oms_common' require_relative 'omslog' end @@ -29,6 +31,7 @@ def start @condition = ConditionVariable.new @mutex = Mutex.new @thread = Thread.new(&method(:run_periodic)) + @@podTelemetryTimeTracker = DateTime.now.to_time.to_i end end @@ -71,6 +74,8 @@ def parse_and_emit_records(podInventory, serviceList) emitTime = currentTime.to_f batchTime = currentTime.utc.iso8601 eventStream = MultiEventStream.new + controllerSet = Set.new [] + telemetryFlush = false begin #begin block start podInventory['items'].each do |items| #podInventory block start records = [] @@ -78,6 +83,7 @@ def parse_and_emit_records(podInventory, serviceList) record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated record['Name'] = items['metadata']['name'] podNameSpace = items['metadata']['namespace'] + if podNameSpace.eql?("kube-system") && !items['metadata'].key?("ownerReferences") # The above case seems to be the only case where you have horizontal scaling of pods # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash @@ -129,9 +135,18 @@ def parse_and_emit_records(podInventory, serviceList) record['ClusterId'] = KubernetesApiClient.getClusterId record['ClusterName'] = KubernetesApiClient.getClusterName record['ServiceName'] = getServiceNameFromLabels(items['metadata']['namespace'], items['metadata']['labels'], serviceList) + # Adding telemetry to send pod telemetry every 5 minutes + timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference/60 + if (timeDifferenceInMinutes >= 5) + telemetryFlush = true + end if !items['metadata']['ownerReferences'].nil? record['ControllerKind'] = items['metadata']['ownerReferences'][0]['kind'] record['ControllerName'] = items['metadata']['ownerReferences'][0]['name'] + if telemetryFlush == true + controllerSet.add(record['ControllerKind'] + record['ControllerName']) + end end podRestartCount = 0 record['PodRestartCount'] = 0 @@ -191,6 +206,11 @@ def parse_and_emit_records(podInventory, serviceList) end end #podInventory block end router.emit_stream(@tag, eventStream) if eventStream + if telemetryFlush == true + ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {}) + ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}) + @@podTelemetryTimeTracker = DateTime.now.to_time.to_i + end @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") From df6f1228a4649df3fb1bae1c9ea02f22daca8efd Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Thu, 20 Dec 2018 15:27:18 -0800 Subject: [PATCH 43/88] Get cpuusage from usageseconds (#175) --- .../code/plugin/CAdvisorMetricsAPIClient.rb | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb index 9e47e5a9e..03d6f89f5 100644 --- a/source/code/plugin/CAdvisorMetricsAPIClient.rb +++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb @@ -20,8 +20,11 @@ class CAdvisorMetricsAPIClient @@rxBytesTimeLast = nil @@txBytesLast = nil @@txBytesTimeLast = nil + @@nodeCpuUsageNanoSecondsLast = nil + @@nodeCpuUsageNanoSecondsTimeLast = nil @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i + def initialize end @@ -73,7 +76,7 @@ def getMetrics() metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes")) metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch")) - metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "cpu", "usageNanoCores", "cpuUsageNanoCores")) + metricDataItems.push(getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores")) metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes")) metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes")) metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes")) @@ -274,24 +277,41 @@ def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToColl metricValue = node[metricCategory][metricNameToCollect] metricTime = node[metricCategory]['time'] - if !(metricNameToCollect == "rxBytes" || metricNameToCollect == "txBytes" ) - @Log.warn("getNodeMetricItemRate : rateMetric is supported only for rxBytes & txBytes and not for #{metricNameToCollect}") + if !(metricNameToCollect == "rxBytes" || metricNameToCollect == "txBytes" || metricNameToCollect == "usageCoreNanoSeconds" ) + @Log.warn("getNodeMetricItemRate : rateMetric is supported only for rxBytes, txBytes & usageCoreNanoSeconds and not for #{metricNameToCollect}") return nil elsif metricNameToCollect == "rxBytes" - if @@rxBytesLast.nil? || @@rxBytesTimeLast.nil? + if @@rxBytesLast.nil? || @@rxBytesTimeLast.nil? || @@rxBytesLast > metricValue #when kubelet is restarted the last condition will be true @@rxBytesLast = metricValue @@rxBytesTimeLast = metricTime return nil else - metricValue = ((metricValue - @@rxBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@rxBytesTimeLast).to_time) + metricRateValue = ((metricValue - @@rxBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@rxBytesTimeLast).to_time) + @@rxBytesLast = metricValue + @@rxBytesTimeLast = metricTime + metricValue = metricRateValue end - else - if @@txBytesLast.nil? || @@txBytesTimeLast.nil? + elsif metricNameToCollect == "txBytes" + if @@txBytesLast.nil? || @@txBytesTimeLast.nil? || @@txBytesLast > metricValue #when kubelet is restarted the last condition will be true @@txBytesLast = metricValue @@txBytesTimeLast = metricTime return nil else - metricValue = ((metricValue - @@txBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@txBytesTimeLast).to_time) + metricRateValue = ((metricValue - @@txBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@txBytesTimeLast).to_time) + @@txBytesLast = metricValue + @@txBytesTimeLast = metricTime + metricValue = metricRateValue + end + else + if @@nodeCpuUsageNanoSecondsLast.nil? || @@nodeCpuUsageNanoSecondsTimeLast.nil? || @@nodeCpuUsageNanoSecondsLast > metricValue #when kubelet is restarted the last condition will be true + @@nodeCpuUsageNanoSecondsLast = metricValue + @@nodeCpuUsageNanoSecondsTimeLast = metricTime + return nil + else + metricRateValue = ((metricValue - @@nodeCpuUsageNanoSecondsLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@nodeCpuUsageNanoSecondsTimeLast).to_time) + @@nodeCpuUsageNanoSecondsLast = metricValue + @@nodeCpuUsageNanoSecondsTimeLast = metricTime + metricValue = metricRateValue end end From dac99311485f2600f9a1fd7b6c48470ada40e8ef Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Fri, 21 Dec 2018 10:46:56 -0800 Subject: [PATCH 44/88] Rashmi/kubenodeinventory (#176) * containernodeinventory changes * changes for containernodeinventory * changes to add node telemetry * pod telemetry cahnges * updated telemetry changes * changes to get uid of owner references as controller id * updating socket to the new mount location * Adding exception telemetry and heartbeat * changes to fix controller type * Fixing typo * fixing method signature * updating plugins to get controller type from env * fixing bugs --- .../code/plugin/ApplicationInsightsUtility.rb | 28 +++++++++++++------ source/code/plugin/DockerApiClient.rb | 3 +- source/code/plugin/in_containerinventory.rb | 1 + source/code/plugin/in_kube_events.rb | 6 ++++ source/code/plugin/in_kube_nodes.rb | 3 ++ source/code/plugin/in_kube_podinventory.rb | 6 ++++ source/code/plugin/in_kube_services.rb | 4 +++ 7 files changed, 42 insertions(+), 9 deletions(-) diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index 76e0b2926..2b2db673b 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -13,12 +13,12 @@ class ApplicationInsightsUtility @@Exception = 'ExceptionEvent' @@AcsClusterType = 'ACS' @@AksClusterType = 'AKS' - @@DaemonsetControllerType = 'DaemonSet' @OmsAdminFilePath = '/etc/opt/microsoft/omsagent/conf/omsadmin.conf' @@EnvAcsResourceName = 'ACS_RESOURCE_NAME' @@EnvAksRegion = 'AKS_REGION' @@EnvAgentVersion = 'AGENT_VERSION' @@EnvApplicationInsightsKey = 'APPLICATIONINSIGHTS_AUTH' + @@EnvControllerType = 'CONTROLLER_TYPE' @@CustomProperties = {} @@Tc = nil @@hostName = (OMS::Common.get_hostname) @@ -54,12 +54,10 @@ def initializeUtility() @@CustomProperties["ClusterName"] = clusterName @@CustomProperties["Region"] = ENV[@@EnvAksRegion] end - @@CustomProperties['ControllerType'] = @@DaemonsetControllerType - dockerInfo = DockerApiClient.dockerInfo - @@CustomProperties['DockerVersion'] = dockerInfo['Version'] - @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion'] + getDockerInfo() @@CustomProperties['WorkspaceID'] = getWorkspaceId @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion] + @@CustomProperties['ControllerType'] = ENV[@@EnvControllerType] encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey] if !encodedAppInsightsKey.nil? decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey) @@ -70,6 +68,14 @@ def initializeUtility() end end + def getDockerInfo() + dockerInfo = DockerApiClient.dockerInfo + if (!dockerInfo.nil? && !dockerInfo.empty?) + @@CustomProperties['DockerVersion'] = dockerInfo['Version'] + @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion'] + end + end + def sendHeartBeatEvent(pluginName) begin eventName = pluginName + @@HeartBeat @@ -100,7 +106,9 @@ def sendCustomMetric(pluginName, properties) def sendExceptionTelemetry(errorStr) begin if @@CustomProperties.empty? || @@CustomProperties.nil? - initializeUtility + initializeUtility() + elsif @@CustomProperties['DockerVersion'].nil? + getDockerInfo() end if !(@@Tc.nil?) @@Tc.track_exception errorStr , :properties => @@CustomProperties @@ -116,7 +124,9 @@ def sendExceptionTelemetry(errorStr) def sendTelemetry(pluginName, properties) begin if @@CustomProperties.empty? || @@CustomProperties.nil? - initializeUtility + initializeUtility() + elsif @@CustomProperties['DockerVersion'].nil? + getDockerInfo() end @@CustomProperties['Computer'] = properties['Computer'] sendHeartBeatEvent(pluginName) @@ -134,7 +144,9 @@ def sendMetricTelemetry(metricName, metricValue, properties) return end if @@CustomProperties.empty? || @@CustomProperties.nil? - initializeUtility + initializeUtility() + elsif @@CustomProperties['DockerVersion'].nil? + getDockerInfo() end telemetryProps = {} telemetryProps["Computer"] = @@hostName diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index e12ef13ec..903256f6d 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -10,10 +10,11 @@ class DockerApiClient require_relative 'DockerApiRestHelper' require_relative 'ApplicationInsightsUtility' - @@SocketPath = "/var/run/docker.sock" + @@SocketPath = "/var/run/host/docker.sock" @@ChunkSize = 4096 @@TimeoutInSeconds = 5 @@PluginName = 'ContainerInventory' + def initialize end diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb index f501421a2..a38697741 100644 --- a/source/code/plugin/in_containerinventory.rb +++ b/source/code/plugin/in_containerinventory.rb @@ -19,6 +19,7 @@ def initialize require_relative 'ContainerInventoryState' require_relative 'ApplicationInsightsUtility' require_relative 'omslog' + end config_param :run_interval, :time, :default => '1m' diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index 5df31df95..b7be24510 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -15,6 +15,8 @@ def initialize require_relative 'KubernetesApiClient' require_relative 'oms_common' require_relative 'omslog' + require_relative 'ApplicationInsightsUtility' + end config_param :run_interval, :time, :default => '1m' @@ -94,6 +96,7 @@ def enumerate(eventList = nil) rescue => errorStr $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -110,6 +113,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_events::run_periodic: enumerate Failed to retrieve kube events: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock @@ -129,6 +133,7 @@ def getEventQueryState rescue => errorStr $log.warn $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end return eventQueryState end @@ -144,6 +149,7 @@ def writeEventQueryState(eventQueryState) rescue => errorStr $log.warn $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index 1c792d0da..85153b21c 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -17,6 +17,7 @@ def initialize require_relative 'ApplicationInsightsUtility' require_relative 'oms_common' require_relative 'omslog' + end config_param :run_interval, :time, :default => '1m' @@ -142,6 +143,7 @@ def enumerate rescue => errorStr $log.warn "Failed to retrieve node inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -158,6 +160,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index c6873e8fe..eaf14b035 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -16,6 +16,7 @@ def initialize require_relative 'ApplicationInsightsUtility' require_relative 'oms_common' require_relative 'omslog' + end config_param :run_interval, :time, :default => '1m' @@ -66,6 +67,7 @@ def enumerate(podList = nil) rescue => errorStr $log.warn "Failed in enumerate pod inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -207,6 +209,7 @@ def parse_and_emit_records(podInventory, serviceList) end #podInventory block end router.emit_stream(@tag, eventStream) if eventStream if telemetryFlush == true + ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory") ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {}) ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}) @@podTelemetryTimeTracker = DateTime.now.to_time.to_i @@ -218,6 +221,7 @@ def parse_and_emit_records(podInventory, serviceList) rescue => errorStr $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end #begin block end end @@ -234,6 +238,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_podinventory::run_periodic: enumerate Failed to retrieve pod inventory: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock @@ -268,6 +273,7 @@ def getServiceNameFromLabels(namespace, labels, serviceList) rescue => errorStr $log.warn "Failed to retrieve service name from labels: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end return serviceName end diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb index 9a33f4581..655beef59 100644 --- a/source/code/plugin/in_kube_services.rb +++ b/source/code/plugin/in_kube_services.rb @@ -14,6 +14,8 @@ def initialize require_relative 'KubernetesApiClient' require_relative 'oms_common' require_relative 'omslog' + require_relative 'ApplicationInsightsUtility' + end config_param :run_interval, :time, :default => '1m' @@ -70,6 +72,7 @@ def enumerate rescue => errorStr $log.warn line.dump, error: errorStr.to_s $log.debug_backtrace(e.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @@ -86,6 +89,7 @@ def run_periodic enumerate rescue => errorStr $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end end @mutex.lock From 04cc1a87e64cae65ffeba3b061312dcb35959b51 Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Wed, 26 Dec 2018 10:32:22 -0800 Subject: [PATCH 45/88] Rashmi/kubenodeinventory (#178) * containernodeinventory changes * changes for containernodeinventory * changes to add node telemetry * pod telemetry cahnges * updated telemetry changes * changes to get uid of owner references as controller id * updating socket to the new mount location * Adding exception telemetry and heartbeat * changes to fix controller type * Fixing typo * fixing method signature * updating plugins to get controller type from env * fixing bugs * changes to fixed type * removing comments * changes for fixed type --- installer/conf/kube.conf | 19 ++++++++++--------- source/code/plugin/in_kube_events.rb | 9 +++++++-- source/code/plugin/in_kube_services.rb | 9 +++++++-- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 22c51ad0e..6331d257e 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -11,7 +11,7 @@ #Kubernetes events type kubeevents - tag oms.api.KubeEvents.CollectionTime + tag oms.containerinsights.KubeEvents run_interval 60s log_level debug @@ -26,7 +26,7 @@ #Kubernetes services type kubeservices - tag oms.api.KubeServices.CollectionTime + tag oms.containerinsights.KubeServices run_interval 60s log_level debug @@ -62,18 +62,19 @@ max_retry_wait 9m - - type out_oms_api + + type out_oms log_level debug - num_threads 5 + num_threads 5 buffer_chunk_limit 5m buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_kubeevents*.buffer + buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer buffer_queue_limit 10 - buffer_queue_full_action drop_oldest_chunk + buffer_queue_full_action drop_oldest_chunk flush_interval 20s retry_limit 10 retry_wait 30s + max_retry_wait 9m @@ -88,8 +89,8 @@ retry_wait 30s - - type out_oms_api + + type out_oms log_level debug num_threads 5 buffer_chunk_limit 20m diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index b7be24510..309dd8034 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -20,7 +20,7 @@ def initialize end config_param :run_interval, :time, :default => '1m' - config_param :tag, :string, :default => "oms.api.KubeEvents.CollectionTime" + config_param :tag, :string, :default => "oms.containerinsights.KubeEvents" def configure (conf) super @@ -88,7 +88,12 @@ def enumerate(eventList = nil) end record['ClusterName'] = KubernetesApiClient.getClusterName record['ClusterId'] = KubernetesApiClient.getClusterId - eventStream.add(emitTime, record) if record + wrapper = { + "DataType"=>"KUBE_EVENTS_BLOB", + "IPName"=>"ContainerInsights", + "DataItems"=>[record.each{|k,v| record[k]=v}] + } + eventStream.add(emitTime, wrapper) if wrapper end router.emit_stream(@tag, eventStream) if eventStream end diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb index 655beef59..e1bb93f30 100644 --- a/source/code/plugin/in_kube_services.rb +++ b/source/code/plugin/in_kube_services.rb @@ -19,7 +19,7 @@ def initialize end config_param :run_interval, :time, :default => '1m' - config_param :tag, :string, :default => "oms.api.KubeServices.CollectionTime" + config_param :tag, :string, :default => "oms.containerinsights.KubeServices" def configure (conf) super @@ -65,7 +65,12 @@ def enumerate record['ClusterIP'] = items['spec']['clusterIP'] record['ServiceType'] = items['spec']['type'] # : Add ports and status fields - eventStream.add(emitTime, record) if record + wrapper = { + "DataType"=>"KUBE_SERVICES_BLOB", + "IPName"=>"ContainerInsights", + "DataItems"=>[record.each{|k,v| record[k]=v}] + } + eventStream.add(emitTime, wrapper) if wrapper end router.emit_stream(@tag, eventStream) if eventStream end From 5883f5368cc9704879b25a145fec80906d91d826 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 26 Dec 2018 13:36:48 -0800 Subject: [PATCH 46/88] Fixing an issue on the cpurate metric, which happens for the first time (when cache is empty) (#179) --- source/code/plugin/CAdvisorMetricsAPIClient.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb index 03d6f89f5..97eec06ab 100644 --- a/source/code/plugin/CAdvisorMetricsAPIClient.rb +++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb @@ -76,7 +76,10 @@ def getMetrics() metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes")) metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch")) - metricDataItems.push(getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores")) + cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores") + if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil? + metricDataItems.push(cpuUsageNanoSecondsRate) + end metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes")) metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes")) metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes")) From 191f3285dad2065f83b57b4b3e55fad6709b15ab Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Fri, 28 Dec 2018 12:27:46 -0800 Subject: [PATCH 47/88] Rashmi/kubenodeinventory (#180) * containernodeinventory changes * changes for containernodeinventory * changes to add node telemetry * pod telemetry cahnges * updated telemetry changes * changes to get uid of owner references as controller id * updating socket to the new mount location * Adding exception telemetry and heartbeat * changes to fix controller type * Fixing typo * fixing method signature * updating plugins to get controller type from env * fixing bugs * changes to fixed type * removing comments * changes for fixed type * adding kubelet version as a dimension --- source/code/plugin/in_kube_nodes.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index 85153b21c..a6908fc99 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -123,7 +123,7 @@ def enumerate if (timeDifferenceInMinutes >= 5) properties = {} properties["Computer"] = record["Computer"] - ApplicationInsightsUtility.sendMetricTelemetry("KubeletVersion", record["KubeletVersion"] , properties) + properties["KubeletVersion"] = record["KubeletVersion"] capacityInfo = items['status']['capacity'] ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties) ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties) From 7e52e8c5553bda70dd33a4afccbcb134657b42be Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Mon, 7 Jan 2019 15:44:25 -0800 Subject: [PATCH 48/88] Exclude docker containers from container inventory (#181) * containernodeinventory changes * changes for containernodeinventory * changes to add node telemetry * pod telemetry cahnges * updated telemetry changes * changes to get uid of owner references as controller id * updating socket to the new mount location * Adding exception telemetry and heartbeat * changes to fix controller type * Fixing typo * fixing method signature * updating plugins to get controller type from env * fixing bugs * changes to fixed type * removing comments * changes for fixed type * adding kubelet version as a dimension * Excluding raw docker containers from container inventory * making labels key case insensitive * make poduid label case insensitive --- source/code/plugin/DockerApiClient.rb | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index 903256f6d..d04bf0589 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -86,7 +86,15 @@ def listContainers() containers = getResponse(request, true, false) if !containers.nil? && !containers.empty? containers.each do |container| - ids.push(container['Id']) + labels = (!container['Labels'].nil?)? container['Labels'] : container['labels'] + if !labels.nil? + labelKeys = labels.keys + #Case insensitive lookup for pod uid label + keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase} + if !labels[keyValue].nil? + ids.push(container['Id']) + end + end end end return ids From f0591f9e70056c61269f3a961906a908845a1cdd Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Tue, 8 Jan 2019 15:10:41 -0800 Subject: [PATCH 49/88] Exclude pauseamd64 containers from container inventory (#182) * containernodeinventory changes * changes for containernodeinventory * changes to add node telemetry * pod telemetry cahnges * updated telemetry changes * changes to get uid of owner references as controller id * updating socket to the new mount location * Adding exception telemetry and heartbeat * changes to fix controller type * Fixing typo * fixing method signature * updating plugins to get controller type from env * fixing bugs * changes to fixed type * removing comments * changes for fixed type * adding kubelet version as a dimension * Excluding raw docker containers from container inventory * making labels key case insensitive * make poduid label case insensitive * changes to exclude pause amd 64 containers --- source/code/plugin/DockerApiClient.rb | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb index d04bf0589..5a46b5fdb 100644 --- a/source/code/plugin/DockerApiClient.rb +++ b/source/code/plugin/DockerApiClient.rb @@ -89,10 +89,18 @@ def listContainers() labels = (!container['Labels'].nil?)? container['Labels'] : container['labels'] if !labels.nil? labelKeys = labels.keys - #Case insensitive lookup for pod uid label - keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase} - if !labels[keyValue].nil? - ids.push(container['Id']) + dockerTypeLabel = labelKeys.find {|k| 'io.kubernetes.docker.type'.downcase == k.downcase} + if !dockerTypeLabel.nil? + dockerTypeLabelValue = labels[dockerTypeLabel] + # Checking for 'io.kubernetes.docker.type' label for docker containers to exclude the pause-amd64 containers + if !(dockerTypeLabelValue.downcase == "podsandbox".downcase) + # Case insensitive lookup for pod uid label - This is to exclude containers created using docker run and only include containers that + # are created in the pods for ContainerInventory + keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase} + if !labels[keyValue].nil? + ids.push(container['Id']) + end + end end end end From 4782435a228c3626b25d8bf1682a0d977e79eb23 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 9 Jan 2019 11:22:53 -0800 Subject: [PATCH 50/88] Update agent version --- installer/conf/td-agent-bit.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index b6b9bcc44..29c98bdf1 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -28,5 +28,5 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion ciprod11292018 + AgentVersion ciprod01092019 From 23bcc4198c3ead32fb0404afeaddac83b3c23b78 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 9 Jan 2019 13:19:06 -0800 Subject: [PATCH 51/88] Updating readme for the latest release --- README.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fc43d6605..5c65308fb 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,29 @@ additional questions or comments. ## Release History Note : The agent version(s) below has dates (ciprod), which indicate the agent build dates (not release dates) - + +### 10/09/2018 - Version microsoft/oms:ciprod01092019 +- Omsagent - 1.8.1.256 (nov 2018 release) +- Persist fluentbit state between container restarts +- Populate 'TimeOfCommand' for agent ingest time for container logs +- Get node cpu usage from cpuusagenanoseconds (and convert to cpuusgaenanocores) +- Container Node Inventory - move to fluentD from OMI +- Mount docker.sock (Daemon set) as /var/run/host +- Liveness probe (Daemon set) - check for omsagent user permissions in docker.sock and update as necessary (required when docker daemon gets restarted) +- Move to fixed type for kubeevents & kubeservices +- Disable collecting ENV for our oms agent container (daemonset & replicaset) +- Disable container inventory collection for 'sandbox' containers & non kubernetes managed containers +- Agent telemetry - ContainerLogsAgentSideLatencyMs +- Agent telemetry - PodCount +- Agent telemetry - ControllerCount +- Agent telemetry - K8S Version +- Agent telemetry - NodeCoreCapacity +- Agent telemetry - NodeMemoryCapacity +- Agent telemetry - KubeEvents (exceptions) +- Agent telemetry - Kubenodes (exceptions) +- Agent telemetry - kubepods (exceptions) +- Agent telemetry - kubeservices (exceptions) +- Agent telemetry - Daemonset , Replicaset as dimensions (bug fix) ### 11/29/2018 - Version microsoft/oms:ciprod11292018 - Disable Container Image inventory workflow From 51d5e938d436584bc094d72361d8652dd51db8bd Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Fri, 11 Jan 2019 13:08:56 -0800 Subject: [PATCH 52/88] Fix indentation in kube.conf and update readme (#184) * containernodeinventory changes * changes for containernodeinventory * changes to add node telemetry * pod telemetry cahnges * updated telemetry changes * changes to get uid of owner references as controller id * updating socket to the new mount location * Adding exception telemetry and heartbeat * changes to fix controller type * Fixing typo * fixing method signature * updating plugins to get controller type from env * fixing bugs * changes to fixed type * removing comments * changes for fixed type * adding kubelet version as a dimension * Excluding raw docker containers from container inventory * making labels key case insensitive * make poduid label case insensitive * changes to exclude pause amd 64 containers * fixing indentation so that kube.conf contents can be used in config map in the yaml * updating readme to fix date and agent version --- README.md | 6 +- installer/conf/kube.conf | 270 +++++++++++++++++++-------------------- 2 files changed, 138 insertions(+), 138 deletions(-) diff --git a/README.md b/README.md index 5c65308fb..dd55f810e 100644 --- a/README.md +++ b/README.md @@ -11,21 +11,21 @@ additional questions or comments. Note : The agent version(s) below has dates (ciprod), which indicate the agent build dates (not release dates) -### 10/09/2018 - Version microsoft/oms:ciprod01092019 +### 01/09/2018 - Version microsoft/oms:ciprod01092019-2 - Omsagent - 1.8.1.256 (nov 2018 release) - Persist fluentbit state between container restarts - Populate 'TimeOfCommand' for agent ingest time for container logs - Get node cpu usage from cpuusagenanoseconds (and convert to cpuusgaenanocores) - Container Node Inventory - move to fluentD from OMI - Mount docker.sock (Daemon set) as /var/run/host -- Liveness probe (Daemon set) - check for omsagent user permissions in docker.sock and update as necessary (required when docker daemon gets restarted) +- Add omsagent user to docker group - Move to fixed type for kubeevents & kubeservices - Disable collecting ENV for our oms agent container (daemonset & replicaset) - Disable container inventory collection for 'sandbox' containers & non kubernetes managed containers - Agent telemetry - ContainerLogsAgentSideLatencyMs - Agent telemetry - PodCount - Agent telemetry - ControllerCount -- Agent telemetry - K8S Version +- Agent telemetry - K8S Version - Agent telemetry - NodeCoreCapacity - Agent telemetry - NodeMemoryCapacity - Agent telemetry - KubeEvents (exceptions) diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 6331d257e..164865022 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -1,148 +1,148 @@ -# Fluentd config file for OMS Docker - cluster components (kubeAPI) + # Fluentd config file for OMS Docker - cluster components (kubeAPI) -#Kubernetes pod inventory - - type kubepodinventory - tag oms.containerinsights.KubePodInventory - run_interval 60s - log_level debug - + #Kubernetes pod inventory + + type kubepodinventory + tag oms.containerinsights.KubePodInventory + run_interval 60s + log_level debug + -#Kubernetes events - - type kubeevents - tag oms.containerinsights.KubeEvents - run_interval 60s - log_level debug - + #Kubernetes events + + type kubeevents + tag oms.containerinsights.KubeEvents + run_interval 60s + log_level debug + -#Kubernetes logs - - type kubelogs - tag oms.api.KubeLogs - run_interval 60s - + #Kubernetes logs + + type kubelogs + tag oms.api.KubeLogs + run_interval 60s + -#Kubernetes services - - type kubeservices - tag oms.containerinsights.KubeServices - run_interval 60s - log_level debug - + #Kubernetes services + + type kubeservices + tag oms.containerinsights.KubeServices + run_interval 60s + log_level debug + -#Kubernetes Nodes - - type kubenodeinventory - tag oms.containerinsights.KubeNodeInventory - run_interval 60s - log_level debug - + #Kubernetes Nodes + + type kubenodeinventory + tag oms.containerinsights.KubeNodeInventory + run_interval 60s + log_level debug + -#Kubernetes perf - - type kubeperf - tag oms.api.KubePerf - run_interval 60s - log_level debug - + #Kubernetes perf + + type kubeperf + tag oms.api.KubePerf + run_interval 60s + log_level debug + - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m - + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 5m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer - buffer_queue_limit 10 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m - + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 5m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer + buffer_queue_limit 10 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + - - type out_oms_api - log_level debug - buffer_chunk_limit 10m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer - buffer_queue_limit 10 - flush_interval 20s - retry_limit 10 - retry_wait 30s - + + type out_oms_api + log_level debug + buffer_chunk_limit 10m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer + buffer_queue_limit 10 + flush_interval 20s + retry_limit 10 + retry_wait 30s + - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m - + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m - + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + - - type out_oms_api - log_level debug - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer - buffer_queue_limit 20 - flush_interval 20s - retry_limit 10 - retry_wait 15s - max_retry_wait 9m - + + type out_oms_api + log_level debug + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer + buffer_queue_limit 20 + flush_interval 20s + retry_limit 10 + retry_wait 15s + max_retry_wait 9m + - - type out_oms - log_level debug - num_threads 5 - buffer_chunk_limit 20m - buffer_type file - buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer - buffer_queue_limit 20 - buffer_queue_full_action drop_oldest_chunk - flush_interval 20s - retry_limit 10 - retry_wait 30s - max_retry_wait 9m - + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + \ No newline at end of file From decf86a3d24dece047ea4b780d10c799fbe1a1ce Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Fri, 11 Jan 2019 13:16:21 -0800 Subject: [PATCH 53/88] updating agent tag --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dd55f810e..099a065e8 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ additional questions or comments. Note : The agent version(s) below has dates (ciprod), which indicate the agent build dates (not release dates) -### 01/09/2018 - Version microsoft/oms:ciprod01092019-2 +### 01/09/2018 - Version microsoft/oms:ciprod01092019 - Omsagent - 1.8.1.256 (nov 2018 release) - Persist fluentbit state between container restarts - Populate 'TimeOfCommand' for agent ingest time for container logs From a1b35db565c9cc324733534b90e3c4f5a98651d7 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Tue, 29 Jan 2019 15:33:59 -0800 Subject: [PATCH 54/88] Get Pods for current Node Only (#185) * Fix KubeAPI Calls to filter to get pods for current node * Reinstate log line --- source/code/go/src/plugins/oms.go | 48 ++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 5d9269d1e..49e91f87f 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -77,15 +77,15 @@ var ( // DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin type DataItem struct { - LogEntry string `json:"LogEntry"` - LogEntrySource string `json:"LogEntrySource"` - LogEntryTimeStamp string `json:"LogEntryTimeStamp"` - LogEntryTimeOfCommand string `json:"TimeOfCommand"` - ID string `json:"Id"` - Image string `json:"Image"` - Name string `json:"Name"` - SourceSystem string `json:"SourceSystem"` - Computer string `json:"Computer"` + LogEntry string `json:"LogEntry"` + LogEntrySource string `json:"LogEntrySource"` + LogEntryTimeStamp string `json:"LogEntryTimeStamp"` + LogEntryTimeOfCommand string `json:"TimeOfCommand"` + ID string `json:"Id"` + Image string `json:"Image"` + Name string `json:"Name"` + SourceSystem string `json:"SourceSystem"` + Computer string `json:"Computer"` } // ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point @@ -137,7 +137,10 @@ func updateContainerImageNameMaps() { _imageIDMap := make(map[string]string) _nameIDMap := make(map[string]string) - pods, err := ClientSet.CoreV1().Pods("").List(metav1.ListOptions{}) + listOptions := metav1.ListOptions{} + listOptions.FieldSelector = fmt.Sprintf("spec.nodeName=%s", Computer) + pods, err := ClientSet.CoreV1().Pods("").List(listOptions) + if err != nil { message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) Log(message) @@ -244,31 +247,30 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { if val, ok := imageIDMap[containerID]; ok { stringMap["Image"] = val } else { - Log("ContainerId %s not present in Map ", containerID) + Log("ContainerId %s not present in Name Map ", containerID) } if val, ok := nameIDMap[containerID]; ok { stringMap["Name"] = val } else { - Log("ContainerId %s not present in Map ", containerID) + Log("ContainerId %s not present in Image Map ", containerID) } - dataItem := DataItem{ - ID: stringMap["Id"], - LogEntry: stringMap["LogEntry"], - LogEntrySource: stringMap["LogEntrySource"], - LogEntryTimeStamp: stringMap["LogEntryTimeStamp"], - LogEntryTimeOfCommand: start.Format(time.RFC3339), - SourceSystem: stringMap["SourceSystem"], - Computer: Computer, - Image: stringMap["Image"], - Name: stringMap["Name"], + ID: stringMap["Id"], + LogEntry: stringMap["LogEntry"], + LogEntrySource: stringMap["LogEntrySource"], + LogEntryTimeStamp: stringMap["LogEntryTimeStamp"], + LogEntryTimeOfCommand: start.Format(time.RFC3339), + SourceSystem: stringMap["SourceSystem"], + Computer: Computer, + Image: stringMap["Image"], + Name: stringMap["Name"], } dataItems = append(dataItems, dataItem) loggedTime, e := time.Parse(time.RFC3339, dataItem.LogEntryTimeStamp) - if e!= nil { + if e != nil { message := fmt.Sprintf("Error while converting LogEntryTimeStamp for telemetry purposes: %s", e.Error()) Log(message) SendException(message) From 22649bad0090c05eb809f0521d9222b514084b9b Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Wed, 30 Jan 2019 15:50:28 -0800 Subject: [PATCH 55/88] changes for container node inventory fixed type (#186) --- installer/conf/kube.conf | 4 ++-- source/code/plugin/in_kube_nodes.rb | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 164865022..d0ef0517d 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -119,8 +119,8 @@ max_retry_wait 9m - - type out_oms_api + + type out_oms log_level debug buffer_chunk_limit 20m buffer_type file diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index a6908fc99..2e48e3f1f 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -6,7 +6,7 @@ module Fluent class Kube_nodeInventory_Input < Input Plugin.register_input('kubenodeinventory', self) - @@ContainerNodeInventoryTag = 'oms.api.ContainerNodeInventory' + @@ContainerNodeInventoryTag = 'oms.containerinsights.ContainerNodeInventory' def initialize super @@ -109,7 +109,12 @@ def enumerate dockerVersion.slice! "docker://" containerNodeInventoryRecord['DockerVersion'] = dockerVersion # ContainerNodeInventory data for docker version and operating system. - containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryRecord) if containerNodeInventoryRecord + containerNodeInventoryWrapper = { + "DataType"=>"CONTAINER_NODE_INVENTORY_BLOB", + "IPName"=>"ContainerInsights", + "DataItems"=>[containerNodeInventoryRecord.each{|k,v| containerNodeInventoryRecord[k]=v}] + } + containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper wrapper = { "DataType"=>"KUBE_NODE_INVENTORY_BLOB", From 61e2eaffe3e60b51d83459a494435f3dd6002821 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 13 Feb 2019 11:38:07 -0800 Subject: [PATCH 56/88] Fix for mooncake (disable telemetry optionally) (#191) * disable telemetry option * fix a typo --- source/code/go/src/plugins/telemetry.go | 5 +++++ source/code/plugin/ApplicationInsightsUtility.rb | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 82f970d3a..a64ca2218 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -120,6 +120,11 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { } TelemetryClient = appinsights.NewTelemetryClient(string(decIkey)) + telemetryOffSwitch := os.Getenv("DISABLE_TELEMETRY") + if strings.Compare(strings.ToLower(telemetryOffSwitch), "true") == 0 { + Log("Appinsights telemetry is disabled \n") + TelemetryClient.SetIsEnabled(false) + } CommonProperties = make(map[string]string) CommonProperties["Computer"] = Computer diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index 27660d708..683be0db4 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -61,9 +61,16 @@ def initializeUtility() @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion] @@CustomProperties['ControllerType'] = ENV[@@EnvControllerType] encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey] - if !encodedAppInsightsKey.nil? + + #Check if telemetry is turned off + telemetryOffSwitch = ENV['DISABLE_TELEMETRY'] + if telemetryOffSwitch && !telemetryOffSwitch.nil? && !telemetryOffSwitch.empty? && telemetryOffSwitch.downcase == "true".downcase + $log.warn("AppInsightsUtility: Telemetry is disabled") + @@Tc = ApplicationInsights::TelemetryClient.new + elsif !encodedAppInsightsKey.nil? decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey) @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey + end rescue => errorStr $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}") From 30dff41106981b9855a89db9227ef9fccbea0158 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Fri, 15 Feb 2019 14:27:33 -0800 Subject: [PATCH 57/88] CustomMetrics to ci_feature (#193) Custom Metrics changes to ci_feature --- installer/conf/container.conf | 24 ++ installer/conf/kube.conf | 25 +- installer/datafiles/base_container.data | 14 + source/code/go/src/plugins/oms.go | 2 +- .../code/plugin/ApplicationInsightsUtility.rb | 19 +- source/code/plugin/CustomMetricsUtils.rb | 26 ++ source/code/plugin/filter_cadvisor2mdm.rb | 215 ++++++++++++++++ source/code/plugin/filter_inventory2mdm.rb | 235 +++++++++++++++++ source/code/plugin/in_cadvisor_perf.rb | 2 + source/code/plugin/in_kube_nodes.rb | 2 + source/code/plugin/in_kube_podinventory.rb | 3 + source/code/plugin/out_mdm.rb | 239 ++++++++++++++++++ 12 files changed, 802 insertions(+), 4 deletions(-) create mode 100644 source/code/plugin/CustomMetricsUtils.rb create mode 100644 source/code/plugin/filter_cadvisor2mdm.rb create mode 100644 source/code/plugin/filter_inventory2mdm.rb create mode 100644 source/code/plugin/out_mdm.rb diff --git a/installer/conf/container.conf b/installer/conf/container.conf index 091753230..f41bd6f98 100755 --- a/installer/conf/container.conf +++ b/installer/conf/container.conf @@ -23,6 +23,14 @@ log_level debug +#custom_metrics_mdm filter plugin + + type filter_cadvisor2mdm + custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope + metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes + log_level info + + type out_oms log_level debug @@ -52,3 +60,19 @@ retry_wait 30s max_retry_wait 9m + + + type out_mdm + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + retry_mdm_post_wait_minutes 60 + diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index d0ef0517d..50a88295e 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -47,6 +47,12 @@ log_level debug + + type filter_inventory2mdm + custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope + log_level info + + type out_oms log_level debug @@ -145,4 +151,21 @@ retry_limit 10 retry_wait 30s max_retry_wait 9m - \ No newline at end of file + + + + type out_mdm + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path /var/opt/microsoft/omsagent/6bb1e963-b08c-43a8-b708-1628305e964a/state/out_mdm_*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + retry_mdm_post_wait_minutes 60 + + diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 7181929e2..c263aa505 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -36,6 +36,9 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/in_cadvisor_perf.rb; source/code/plugin/in_cadvisor_perf.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_kube_services.rb; source/code/plugin/in_kube_services.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_kube_nodes.rb; source/code/plugin/in_kube_nodes.rb; 644; root; root +/opt/microsoft/omsagent/plugin/filter_inventory2mdm.rb; source/code/plugin/filter_inventory2mdm.rb; 644; root; root +/opt/microsoft/omsagent/plugin/CustomMetricsUtils.rb; source/code/plugin/CustomMetricsUtils.rb; 644; root; root + /opt/microsoft/omsagent/plugin/ApplicationInsightsUtility.rb; source/code/plugin/ApplicationInsightsUtility.rb; 644; root; root /opt/microsoft/omsagent/plugin/ContainerInventoryState.rb; source/code/plugin/ContainerInventoryState.rb; 644; root; root @@ -43,6 +46,9 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/DockerApiRestHelper.rb; source/code/plugin/DockerApiRestHelper.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_containerinventory.rb; source/code/plugin/in_containerinventory.rb; 644; root; root +/opt/microsoft/omsagent/plugin/out_mdm.rb; source/code/plugin/out_mdm.rb; 644; root; root +/opt/microsoft/omsagent/plugin/filter_cadvisor2mdm.rb; source/code/plugin/filter_cadvisor2mdm.rb; 644; root; root + /opt/microsoft/omsagent/plugin/lib/application_insights/version.rb; source/code/plugin/lib/application_insights/version.rb; 644; root; root /opt/microsoft/omsagent/plugin/lib/application_insights/rack/track_request.rb; source/code/plugin/lib/application_insights/rack/track_request.rb; 644; root; root /opt/microsoft/omsagent/plugin/lib/application_insights/unhandled_exception.rb; source/code/plugin/lib/application_insights/unhandled_exception.rb; 644; root; root @@ -170,6 +176,14 @@ touch /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt chmod 666 /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt +touch /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log +chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log +chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log + +touch /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log +chmod 666 /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log +chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log + mv /etc/opt/microsoft/docker-cimprov/container.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 49e91f87f..27ae6df5c 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -140,7 +140,7 @@ func updateContainerImageNameMaps() { listOptions := metav1.ListOptions{} listOptions.FieldSelector = fmt.Sprintf("spec.nodeName=%s", Computer) pods, err := ClientSet.CoreV1().Pods("").List(listOptions) - + if err != nil { message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error()) Log(message) diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index 683be0db4..5c5e92a6c 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -98,7 +98,7 @@ def sendHeartBeatEvent(pluginName) end end - def sendCustomMetric(pluginName, properties) + def sendLastProcessedContainerInventoryCountMetric(pluginName, properties) begin if !(@@Tc.nil?) @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], @@ -112,6 +112,21 @@ def sendCustomMetric(pluginName, properties) end end + def sendCustomEvent(eventName, properties) + begin + if @@CustomProperties.empty? || @@CustomProperties.nil? + initializeUtility() + end + if !(@@Tc.nil?) + @@Tc.track_event eventName, :properties => @@CustomProperties + @@Tc.flush + $log.info("AppInsights Custom Event #{eventName} sent successfully") + end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}") + end + end + def sendExceptionTelemetry(errorStr) begin if @@CustomProperties.empty? || @@CustomProperties.nil? @@ -139,7 +154,7 @@ def sendTelemetry(pluginName, properties) end @@CustomProperties['Computer'] = properties['Computer'] sendHeartBeatEvent(pluginName) - sendCustomMetric(pluginName, properties) + sendLastProcessedContainerInventoryCountMetric(pluginName, properties) rescue => errorStr $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}") end diff --git a/source/code/plugin/CustomMetricsUtils.rb b/source/code/plugin/CustomMetricsUtils.rb new file mode 100644 index 000000000..d06c9ad91 --- /dev/null +++ b/source/code/plugin/CustomMetricsUtils.rb @@ -0,0 +1,26 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +class CustomMetricsUtils + def initialize + end + + class << self + def check_custom_metrics_availability(custom_metric_regions) + aks_region = ENV['AKS_REGION'] + aks_resource_id = ENV['AKS_RESOURCE_ID'] + if aks_region.to_s.empty? && aks_resource_id.to_s.empty? + false # This will also take care of AKS-Engine Scenario. AKS_REGION/AKS_RESOURCE_ID is not set for AKS-Engine. Only ACS_RESOURCE_NAME is set + end + + custom_metrics_regions_arr = custom_metric_regions.split(',') + custom_metrics_regions_hash = custom_metrics_regions_arr.map {|x| [x.downcase,true]}.to_h + + if custom_metrics_regions_hash.key?(aks_region.downcase) + true + else + false + end + end + end +end \ No newline at end of file diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb new file mode 100644 index 000000000..85f9f688e --- /dev/null +++ b/source/code/plugin/filter_cadvisor2mdm.rb @@ -0,0 +1,215 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. + +# frozen_string_literal: true + +module Fluent + require 'logger' + require 'json' + require_relative 'oms_common' + require_relative 'CustomMetricsUtils' + + class CAdvisor2MdmFilter < Filter + Fluent::Plugin.register_filter('filter_cadvisor2mdm', self) + + config_param :enable_log, :integer, :default => 0 + config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log' + config_param :custom_metrics_azure_regions, :string + config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes' + + @@cpu_usage_milli_cores = 'cpuUsageMillicores' + @@cpu_usage_nano_cores = 'cpuusagenanocores' + @@object_name_k8s_node = 'K8SNode' + @@hostName = (OMS::Common.get_hostname) + @@custom_metrics_template = ' + { + "time": "%{timestamp}", + "data": { + "baseData": { + "metric": "%{metricName}", + "namespace": "Insights.Container/nodes", + "dimNames": [ + "host" + ], + "series": [ + { + "dimValues": [ + "%{hostvalue}" + ], + "min": %{metricminvalue}, + "max": %{metricmaxvalue}, + "sum": %{metricsumvalue}, + "count": 1 + } + ] + } + } + }' + + @@metric_name_metric_percentage_name_hash = { + @@cpu_usage_milli_cores => "cpuUsagePercentage", + "memoryRssBytes" => "memoryRssPercentage", + "memoryWorkingSetBytes" => "memoryWorkingSetPercentage" + } + + @process_incoming_stream = true + @metrics_to_collect_hash = {} + + def initialize + super + end + + def configure(conf) + super + @log = nil + + if @enable_log + @log = Logger.new(@log_path, 'weekly') + @log.debug {'Starting filter_cadvisor2mdm plugin'} + end + end + + def start + super + @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions) + @metrics_to_collect_hash = build_metrics_hash + @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}" + + # initialize cpu and memory limit + if @process_incoming_stream + @cpu_capacity = 0.0 + @memory_capacity = 0.0 + ensure_cpu_memory_capacity_set + end + end + + def build_metrics_hash + @log.debug "Building Hash of Metrics to Collect" + metrics_to_collect_arr = @metrics_to_collect.split(',').map(&:strip) + metrics_hash = metrics_to_collect_arr.map {|x| [x.downcase,true]}.to_h + @log.info "Metrics Collected : #{metrics_hash}" + return metrics_hash + end + + def shutdown + super + end + + def filter(tag, time, record) + begin + if @process_incoming_stream + object_name = record['DataItems'][0]['ObjectName'] + counter_name = record['DataItems'][0]['Collections'][0]['CounterName'] + if object_name == @@object_name_k8s_node && @metrics_to_collect_hash.key?(counter_name.downcase) + percentage_metric_value = 0.0 + + # Compute and send % CPU and Memory + metric_value = record['DataItems'][0]['Collections'][0]['Value'] + if counter_name.downcase == @@cpu_usage_nano_cores + metric_name = @@cpu_usage_milli_cores + metric_value = metric_value/1000000 + if @cpu_capacity != 0.0 + percentage_metric_value = (metric_value*1000000)*100/@cpu_capacity + end + end + + if counter_name.start_with?("memory") + metric_name = counter_name + if @memory_capacity != 0.0 + percentage_metric_value = metric_value*100/@memory_capacity + end + end + return get_metric_records(record, metric_name, metric_value, percentage_metric_value) + else + return [] + end + else + return [] + end + rescue Exception => e + @log.info "Error processing cadvisor record Exception: #{e.class} Message: #{e.message}" + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + return [] + end + end + + def ensure_cpu_memory_capacity_set + + @log.info "ensure_cpu_memory_capacity_set @cpu_capacity #{@cpu_capacity} @memory_capacity #{@memory_capacity}" + if @cpu_capacity != 0.0 && @memory_capacity != 0.0 + @log.info "CPU And Memory Capacity are already set" + return + end + + begin + nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes?fieldSelector=metadata.name%3D#{@@hostName}").body) + rescue Exception => e + @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} " + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + end + if !nodeInventory.nil? + cpu_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores") + if !cpu_capacity_json.nil? && !cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil? + @cpu_capacity = cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'] + @log.info "CPU Limit #{@cpu_capacity}" + else + @log.info "Error getting cpu_capacity" + end + memory_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes") + if !memory_capacity_json.nil? && !memory_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil? + @memory_capacity = memory_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'] + @log.info "Memory Limit #{@memory_capacity}" + else + @log.info "Error getting memory_capacity" + end + end + end + + def get_metric_records(record, metric_name, metric_value, percentage_metric_value) + records = [] + custommetricrecord = @@custom_metrics_template % { + timestamp: record['DataItems'][0]['Timestamp'], + metricName: metric_name, + hostvalue: record['DataItems'][0]['Host'], + objectnamevalue: record['DataItems'][0]['ObjectName'], + instancenamevalue: record['DataItems'][0]['InstanceName'], + metricminvalue: metric_value, + metricmaxvalue: metric_value, + metricsumvalue: metric_value + } + records.push(JSON.parse(custommetricrecord)) + + if !percentage_metric_value.nil? + additional_record = @@custom_metrics_template % { + timestamp: record['DataItems'][0]['Timestamp'], + metricName: @@metric_name_metric_percentage_name_hash[metric_name], + hostvalue: record['DataItems'][0]['Host'], + objectnamevalue: record['DataItems'][0]['ObjectName'], + instancenamevalue: record['DataItems'][0]['InstanceName'], + metricminvalue: percentage_metric_value, + metricmaxvalue: percentage_metric_value, + metricsumvalue: percentage_metric_value + } + records.push(JSON.parse(additional_record)) + end + @log.info "Metric Name: #{metric_name} Metric Value: #{metric_value} Percentage Metric Value: #{percentage_metric_value}" + return records + end + + + def filter_stream(tag, es) + new_es = MultiEventStream.new + ensure_cpu_memory_capacity_set + es.each { |time, record| + begin + filtered_records = filter(tag, time, record) + filtered_records.each {|filtered_record| + new_es.add(time, filtered_record) if filtered_record + } if filtered_records + rescue => e + router.emit_error_event(tag, time, record, e) + end + } + new_es + end + end +end diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb new file mode 100644 index 000000000..d9864bc1a --- /dev/null +++ b/source/code/plugin/filter_inventory2mdm.rb @@ -0,0 +1,235 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. + +# frozen_string_literal: true + +module Fluent + require 'logger' + require 'json' + require_relative 'oms_common' + require_relative 'CustomMetricsUtils' + + class Inventory2MdmFilter < Filter + Fluent::Plugin.register_filter('filter_inventory2mdm', self) + + config_param :enable_log, :integer, :default => 0 + config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log' + config_param :custom_metrics_azure_regions, :string + + @@node_count_metric_name = 'nodesCount' + @@pod_count_metric_name = 'podCount' + @@pod_inventory_tag = 'mdm.kubepodinventory' + @@node_inventory_tag = 'mdm.kubenodeinventory' + @@node_status_ready = 'Ready' + @@node_status_not_ready = 'NotReady' + + @@node_inventory_custom_metrics_template = ' + { + "time": "%{timestamp}", + "data": { + "baseData": { + "metric": "%{metricName}", + "namespace": "insights.container/nodes", + "dimNames": [ + "status" + ], + "series": [ + { + "dimValues": [ + "%{statusValue}" + ], + "min": %{node_status_count}, + "max": %{node_status_count}, + "sum": %{node_status_count}, + "count": 1 + } + ] + } + } + }' + + @@pod_inventory_custom_metrics_template = ' + { + "time": "%{timestamp}", + "data": { + "baseData": { + "metric": "%{metricName}", + "namespace": "insights.container/pods", + "dimNames": [ + "phase", + "namespace", + "node", + "controllerName" + ], + "series": [ + { + "dimValues": [ + "%{phaseDimValue}", + "%{namespaceDimValue}", + "%{nodeDimValue}", + "%{controllerNameDimValue}" + ], + "min": %{podCountMetricValue}, + "max": %{podCountMetricValue}, + "sum": %{podCountMetricValue}, + "count": 1 + } + ] + } + } + }' + + @process_incoming_stream = true + + def initialize + super + end + + def configure(conf) + super + @log = nil + + if @enable_log + @log = Logger.new(@log_path, 'weekly') + @log.debug {'Starting filter_inventory2mdm plugin'} + end + end + + def start + super + @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions) + @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}" + end + + def shutdown + super + end + + def process_node_inventory_records(es) + timestamp = DateTime.now + + begin + node_ready_count = 0 + node_not_ready_count = 0 + records = [] + + es.each{|time,record| + begin + timestamp = record['DataItems'][0]['CollectionTime'] + node_status = record['DataItems'][0]['Status'] + if node_status.downcase == @@node_status_ready.downcase + node_ready_count = node_ready_count+1 + else + node_not_ready_count = node_not_ready_count + 1 + end + rescue => e + end + } + + ready_record = @@node_inventory_custom_metrics_template % { + timestamp: timestamp, + metricName: @@node_count_metric_name, + statusValue: @@node_status_ready, + node_status_count: node_ready_count + } + records.push(JSON.parse(ready_record)) + + not_ready_record = @@node_inventory_custom_metrics_template % { + timestamp: timestamp, + metricName: @@node_count_metric_name, + statusValue: @@node_status_not_ready, + node_status_count: node_not_ready_count + } + records.push(JSON.parse(not_ready_record)) + rescue Exception => e + @log.info "Error processing node inventory records Exception: #{e.class} Message: #{e.message}" + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + return [],timestamp + end + return records,timestamp + end + + def process_pod_inventory_records(es) + timestamp = DateTime.now + pod_count_hash = Hash.new + + begin + records = [] + es.each{|time,record| + + timestamp = record['DataItems'][0]['CollectionTime'] + podPhaseDimValue = record['DataItems'][0]['PodStatus'] + podNamespaceDimValue = record['DataItems'][0]['Namespace'] + podControllerNameDimValue = record['DataItems'][0]['ControllerName'] + podNodeDimValue = record['DataItems'][0]['Computer'] + + # group by distinct dimension values + pod_key = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue, podPhaseDimValue].join('~~') + + if pod_count_hash.key?(pod_key) + pod_count = pod_count_hash[pod_key] + pod_count = pod_count + 1 + pod_count_hash[pod_key] = pod_count + else + pod_count = 1 + pod_count_hash[pod_key] = pod_count + end + } + + pod_count_hash.each {|key, value| + + key_elements = key.split('~~') + if key_elements.length != 4 + next + end + + # get dimension values by key + podNodeDimValue = key_elements[0] + podNamespaceDimValue = key_elements[1] + podControllerNameDimValue = key_elements[2] + podPhaseDimValue = key_elements[3] + + record = @@pod_inventory_custom_metrics_template % { + timestamp: timestamp, + metricName: @@pod_count_metric_name, + phaseDimValue: podPhaseDimValue, + namespaceDimValue: podNamespaceDimValue, + nodeDimValue: podNodeDimValue, + controllerNameDimValue: podControllerNameDimValue, + podCountMetricValue: value + } + records.push(JSON.parse(record)) + } + rescue Exception => e + @log.info "Error processing pod inventory record Exception: #{e.class} Message: #{e.message}" + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + return [],timestamp + end + return records, timestamp + end + + def filter_stream(tag, es) + new_es = MultiEventStream.new + filtered_records = [] + time = DateTime.now + begin + if @process_incoming_stream + @log.info 'Processing NODE inventory records in filter plugin to send to MDM' + if tag.downcase.start_with?(@@node_inventory_tag) + filtered_records, time = process_node_inventory_records(es) + elsif tag.downcase.start_with?(@@pod_inventory_tag) + @log.info 'Processing POD inventory records in filter plugin to send to MDM' + filtered_records, time = process_pod_inventory_records(es) + else + filtered_records = [] + end + end + filtered_records.each {|filtered_record| + new_es.add(time, filtered_record) if filtered_record + } if filtered_records + rescue => e + @log.info "Exception in filter_stream #{e}" + end + new_es + end + end +end diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index 5b551f74e..a857aa6b9 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -18,6 +18,7 @@ def initialize config_param :run_interval, :time, :default => '1m' config_param :tag, :string, :default => "oms.api.cadvisorperf" + config_param :mdmtag, :string, :default => "mdm.cadvisorperf" def configure (conf) super @@ -55,6 +56,7 @@ def enumerate() end router.emit_stream(@tag, eventStream) if eventStream + router.emit_stream(@mdmtag, eventStream) if eventStream @@istestvar = ENV['ISTEST'] if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index 2e48e3f1f..ba1dacbe0 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -7,6 +7,7 @@ class Kube_nodeInventory_Input < Input Plugin.register_input('kubenodeinventory', self) @@ContainerNodeInventoryTag = 'oms.containerinsights.ContainerNodeInventory' + @@MDMKubeNodeInventoryTag = 'mdm.kubenodeinventory' def initialize super @@ -136,6 +137,7 @@ def enumerate end end router.emit_stream(@tag, eventStream) if eventStream + router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream if telemetrySent == true @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index eaf14b035..dee3df30b 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -6,6 +6,8 @@ module Fluent class Kube_PodInventory_Input < Input Plugin.register_input('kubepodinventory', self) + @@MDMKubePodInventoryTag = 'mdm.kubepodinventory' + def initialize super require 'yaml' @@ -208,6 +210,7 @@ def parse_and_emit_records(podInventory, serviceList) end end #podInventory block end router.emit_stream(@tag, eventStream) if eventStream + router.emit_stream(@@MDMKubePodInventoryTag, eventStream) if eventStream if telemetryFlush == true ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory") ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {}) diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb new file mode 100644 index 000000000..2f36ea7d5 --- /dev/null +++ b/source/code/plugin/out_mdm.rb @@ -0,0 +1,239 @@ +module Fluent + + class OutputMDM < BufferedOutput + + config_param :retry_mdm_post_wait_minutes, :integer + + Plugin.register_output('out_mdm', self) + + def initialize + super + require 'net/http' + require 'net/https' + require 'uri' + require 'json' + require_relative 'KubernetesApiClient' + require_relative 'ApplicationInsightsUtility' + + @@token_resource_url = 'https://monitoring.azure.com/' + @@grant_type = 'client_credentials' + @@azure_json_path = '/etc/kubernetes/host/azure.json' + @@post_request_url_template = "https://%{aks_region}.monitoring.azure.com%{aks_resource_id}/metrics" + @@token_url_template = "https://login.microsoftonline.com/%{tenant_id}/oauth2/token" + @@plugin_name = "AKSCustomMetricsMDM" + + @data_hash = {} + @token_url = nil + @http_client = nil + @token_expiry_time = Time.now + @cached_access_token = String.new + @last_post_attempt_time = Time.now + @first_post_attempt_made = false + end + + def configure(conf) + s = conf.add_element("secondary") + s["type"] = ChunkErrorHandler::SecondaryName + super + end + + def start + super + file = File.read(@@azure_json_path) + # Handle the case where the file read fails. Send Telemetry and exit the plugin? + @data_hash = JSON.parse(file) + @token_url = @@token_url_template % {tenant_id: @data_hash['tenantId']} + @cached_access_token = get_access_token + aks_resource_id = ENV['AKS_RESOURCE_ID'] + aks_region = ENV['AKS_REGION'] + if aks_resource_id.to_s.empty? + @log.info "Environment Variable AKS_RESOURCE_ID is not set.. " + raise Exception.new "Environment Variable AKS_RESOURCE_ID is not set!!" + end + if aks_region.to_s.empty? + @log.info "Environment Variable AKS_REGION is not set.. " + raise Exception.new "Environment Variable AKS_REGION is not set!!" + end + + @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id} + @post_request_uri = URI.parse(@@post_request_url) + @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port) + @http_client.use_ssl = true + @log.info "POST Request url: #{@@post_request_url}" + ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMPluginStart", {}) + end + + # get the access token only if the time to expiry is less than 5 minutes + def get_access_token + if @cached_access_token.to_s.empty? || (Time.now + 5*60 > @token_expiry_time) # token is valid for 60 minutes. Refresh token 5 minutes from expiration + @log.info "Refreshing access token for out_mdm plugin.." + token_uri = URI.parse(@token_url) + http_access_token = Net::HTTP.new(token_uri.host, token_uri.port) + http_access_token.use_ssl = true + token_request = Net::HTTP::Post.new(token_uri.request_uri) + token_request.set_form_data( + { + 'grant_type' => @@grant_type, + 'client_id' => @data_hash['aadClientId'], + 'client_secret' => @data_hash['aadClientSecret'], + 'resource' => @@token_resource_url + } + ) + + token_response = http_access_token.request(token_request) + # Handle the case where the response is not 200 + parsed_json = JSON.parse(token_response.body) + @token_expiry_time = Time.now + 59*60 # set the expiry time to be ~one hour from current time + @cached_access_token = parsed_json['access_token'] + end + @cached_access_token + end + + def write_status_file(success, message) + fn = '/var/opt/microsoft/omsagent/log/MDMIngestion.status' + status = '{ "operation": "MDMIngestion", "success": "%s", "message": "%s" }' % [success, message] + begin + File.open(fn,'w') { |file| file.write(status) } + rescue => e + @log.debug "Error:'#{e}'" + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + end + end + + # This method is called when an event reaches to Fluentd. + # Convert the event to a raw string. + def format(tag, time, record) + if record != {} + @log.trace "Buffering #{tag}" + return [tag, record].to_msgpack + else + return "" + end + end + + # This method is called every flush interval. Send the buffer chunk to MDM. + # 'chunk' is a buffer chunk that includes multiple formatted records + def write(chunk) + begin + if !@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes*60) + post_body = [] + chunk.msgpack_each {|(tag, record)| + post_body.push(record.to_json) + } + send_to_mdm post_body + else + @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time)/60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP" + end + rescue Exception => e + @log.info "Exception when writing to MDM: #{e}" + end + end + + def send_to_mdm(post_body) + begin + access_token = get_access_token + request = Net::HTTP::Post.new(@post_request_uri.request_uri) + request['Content-Type'] = "application/x-ndjson" + request['Authorization'] = "Bearer #{access_token}" + request.body = post_body.join("\n") + response = @http_client.request(request) + response.value # this throws for non 200 HTTP response code + @log.info "HTTP Post Response Code : #{response.code}" + ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMSendSuccessful", {}) + rescue Net::HTTPServerException => e + @log.info "Failed to Post Metrics to MDM : #{e} Response: #{response}" + @log.debug_backtrace(e.backtrace) + if !response.code.empty? && response.code == 403.to_s + @log.info "Response Code #{response.code} Updating @last_post_attempt_time" + @last_post_attempt_time = Time.now + @first_post_attempt_made = true + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + # Not raising exception, as that will cause retries to happen + else + @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}" + raise e + end + rescue Errno::ETIMEDOUT => e + @log.info "Timed out when POSTing Metrics to MDM : #{e} Response: #{response}" + @log.debug_backtrace(e.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + raise e + rescue Exception => e + @log.info "Exception POSTing Metrics to MDM : #{e} Response: #{response}" + @log.debug_backtrace(e.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) + raise e + end + end + private + + class ChunkErrorHandler + include Configurable + include PluginId + include PluginLoggerMixin + + SecondaryName = "__ChunkErrorHandler__" + + Plugin.register_output(SecondaryName, self) + + def initialize + @router = nil + end + + def secondary_init(primary) + @error_handlers = create_error_handlers @router + end + + def start + # NOP + end + + def shutdown + # NOP + end + + def router=(r) + @router = r + end + + def write(chunk) + chunk.msgpack_each {|(tag, record)| + @error_handlers[tag].emit(record) + } + end + + private + + def create_error_handlers(router) + nop_handler = NopErrorHandler.new + Hash.new() { |hash, tag| + etag = OMS::Common.create_error_tag tag + hash[tag] = router.match?(etag) ? + ErrorHandler.new(router, etag) : + nop_handler + } + end + + class ErrorHandler + def initialize(router, etag) + @router = router + @etag = etag + end + + def emit(record) + @router.emit(@etag, Fluent::Engine.now, record) + end + end + + class NopErrorHandler + def emit(record) + # NOP + end + end + + end + + end # class OutputMDM + +end # module Fluent + From f1b0cd2a1945057340dc48f85ea685b3a5a69b08 Mon Sep 17 00:00:00 2001 From: Kaveesh Dubey Date: Thu, 24 Jan 2019 12:12:01 -0800 Subject: [PATCH 58/88] add ContainerNotRunning column to KubePodInventory --- source/code/plugin/in_kube_podinventory.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index dee3df30b..9b8ee1fb8 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -173,6 +173,7 @@ def parse_and_emit_records(podInventory, serviceList) containerRestartCount = container['restartCount'] record['ContainerRestartCount'] = containerRestartCount containerStatus = container['state'] + record['ContainerNotRunningReason'] = '' # state is of the following form , so just picking up the first key name # "state": { # "waiting": { @@ -190,6 +191,10 @@ def parse_and_emit_records(podInventory, serviceList) #Picking up both container and node start time from cAdvisor to be consistent if containerStatus.keys[0] == "running" record['ContainerCreationTimeStamp'] = container['state']['running']['startedAt'] + else + if !containerStatus[containerStatus.keys[0]]['reason'].nil? && !containerStatus[containerStatus.keys[0]]['reason'].empty? + record['ContainerNotRunningReason'] = containerStatus[containerStatus.keys[0]]['reason'] + end end podRestartCount += containerRestartCount records.push(record.dup) From 616a803a4c962511a2a27e3f8382b8b82c09362c Mon Sep 17 00:00:00 2001 From: Kaveesh Dubey Date: Thu, 24 Jan 2019 13:52:38 -0800 Subject: [PATCH 59/88] merge pr feedback: update name to ContainerStatusReason --- source/code/plugin/in_kube_podinventory.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index 9b8ee1fb8..3d026b05f 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -173,7 +173,7 @@ def parse_and_emit_records(podInventory, serviceList) containerRestartCount = container['restartCount'] record['ContainerRestartCount'] = containerRestartCount containerStatus = container['state'] - record['ContainerNotRunningReason'] = '' + record['ContainerStatusReason'] = '' # state is of the following form , so just picking up the first key name # "state": { # "waiting": { @@ -193,7 +193,7 @@ def parse_and_emit_records(podInventory, serviceList) record['ContainerCreationTimeStamp'] = container['state']['running']['startedAt'] else if !containerStatus[containerStatus.keys[0]]['reason'].nil? && !containerStatus[containerStatus.keys[0]]['reason'].empty? - record['ContainerNotRunningReason'] = containerStatus[containerStatus.keys[0]]['reason'] + record['ContainerStatusReason'] = containerStatus[containerStatus.keys[0]]['reason'] end end podRestartCount += containerRestartCount From c33ca34233f9adbe02b55c36e7148258041f997d Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Tue, 19 Feb 2019 13:10:03 -0800 Subject: [PATCH 60/88] Zero Fill for Missing Pod Phases, Change Namespace Dimension to Kubernetes namespace, as it might be confused with metrics namespace in Metrics Explorer (#194) * Zero Fill for Pod Counts by Phase * Change namespace dimension to Kubernetes namespace --- source/code/plugin/filter_inventory2mdm.rb | 31 +++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb index d9864bc1a..8aaa5ff01 100644 --- a/source/code/plugin/filter_inventory2mdm.rb +++ b/source/code/plugin/filter_inventory2mdm.rb @@ -56,7 +56,7 @@ class Inventory2MdmFilter < Filter "namespace": "insights.container/pods", "dimNames": [ "phase", - "namespace", + "Kubernetes namespace", "node", "controllerName" ], @@ -77,7 +77,9 @@ class Inventory2MdmFilter < Filter } } }' - + + @@pod_phase_values = ['Running', 'Pending', 'Succeeded', 'Failed', 'Unknown'] + @process_incoming_stream = true def initialize @@ -151,7 +153,7 @@ def process_node_inventory_records(es) def process_pod_inventory_records(es) timestamp = DateTime.now pod_count_hash = Hash.new - + no_phase_dim_values_hash = Hash.new begin records = [] es.each{|time,record| @@ -173,6 +175,29 @@ def process_pod_inventory_records(es) pod_count = 1 pod_count_hash[pod_key] = pod_count end + + # Collect all possible combinations of dimension values other than pod phase + key_without_phase_dim_value = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue].join('~~') + if no_phase_dim_values_hash.key?(key_without_phase_dim_value) + @log.info "#{key_without_phase_dim_value} already present in #{no_phase_dim_values_hash}" + next + else + @log.info "Adding #{key_without_phase_dim_value} to #{no_phase_dim_values_hash}" + no_phase_dim_values_hash[key_without_phase_dim_value] = true + end + } + + # generate all possible values of non_phase_dim_values X pod Phases and zero-fill the ones that are not already present + no_phase_dim_values_hash.each {|key, value| + @@pod_phase_values.each{|phase| + pod_key = [key, phase].join('~~') + if !pod_count_hash.key?(pod_key) + pod_count_hash[pod_key] = 0 + @log.info "Zero filled #{pod_key}" + else + next + end + } } pod_count_hash.each {|key, value| From 2651750f04932a808a214f84cc7a5742fd075591 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Wed, 20 Feb 2019 13:31:23 -0800 Subject: [PATCH 61/88] No Retries for non 404 4xx errors (#196) --- source/code/plugin/out_mdm.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb index 2f36ea7d5..6bde98534 100644 --- a/source/code/plugin/out_mdm.rb +++ b/source/code/plugin/out_mdm.rb @@ -126,6 +126,7 @@ def write(chunk) end rescue Exception => e @log.info "Exception when writing to MDM: #{e}" + raise e end end @@ -149,7 +150,11 @@ def send_to_mdm(post_body) @first_post_attempt_made = true ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) # Not raising exception, as that will cause retries to happen - else + elsif !response.code.empty? && response.code.start_with?('4') + # Log 400 errors and continue + @log.info "Non-retryable HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}" + else + # raise if the response code is non-400 @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}" raise e end From 195bc3382342c2dfe1f7bd28e623486553b5d59f Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 20 Feb 2019 14:51:56 -0800 Subject: [PATCH 62/88] Update agent version for telemetry --- installer/conf/td-agent-bit.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 29c98bdf1..863e2d86a 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -28,5 +28,5 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion ciprod01092019 + AgentVersion ciprod01202019 From 59d6c61e6a5d0841333dca6a685fd0e633b9b53c Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 20 Feb 2019 15:51:09 -0800 Subject: [PATCH 63/88] Update readme for upcoming (ciprod01202019) release --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index 099a065e8..8b5898e92 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,22 @@ additional questions or comments. Note : The agent version(s) below has dates (ciprod), which indicate the agent build dates (not release dates) +### 01/20/2019 - Version microsoft/oms:ciprod01202019 +- Container logs enrichment optimization +- Get container meta data only for containers in current node (vs cluster before) +- Update fluent bit 0.13.7 => 0.14.4 +- This fixes the escaping issue in the container logs +- Mooncake cloud support for agent +- Ability to disable agent telemetry +- Ability to onboard and ingest to mooncake cloud +- Add & populate 'ContainerStatusReason' column to KubePodInventory +- Alertable (custom) metrics (to AzureMonitor - only for AKS clusters) +- Cpuusagenanocores & % +- MemoryWorkingsetBytes & % +- MemoryRssBytes & % +- Podcount by node, phase & namespace +- Nodecount + ### 01/09/2018 - Version microsoft/oms:ciprod01092019 - Omsagent - 1.8.1.256 (nov 2018 release) - Persist fluentbit state between container restarts From 0189bc0a7a8cc5bd1f657baea8a12895e5861ffe Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 20 Feb 2019 15:53:35 -0800 Subject: [PATCH 64/88] fix readme formatting --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 8b5898e92..14c07e948 100644 --- a/README.md +++ b/README.md @@ -13,19 +13,19 @@ Note : The agent version(s) below has dates (ciprod), which indicate t ### 01/20/2019 - Version microsoft/oms:ciprod01202019 - Container logs enrichment optimization -- Get container meta data only for containers in current node (vs cluster before) +..*Get container meta data only for containers in current node (vs cluster before) - Update fluent bit 0.13.7 => 0.14.4 -- This fixes the escaping issue in the container logs +..*This fixes the escaping issue in the container logs - Mooncake cloud support for agent -- Ability to disable agent telemetry -- Ability to onboard and ingest to mooncake cloud +..*Ability to disable agent telemetry +..*Ability to onboard and ingest to mooncake cloud - Add & populate 'ContainerStatusReason' column to KubePodInventory - Alertable (custom) metrics (to AzureMonitor - only for AKS clusters) -- Cpuusagenanocores & % -- MemoryWorkingsetBytes & % -- MemoryRssBytes & % -- Podcount by node, phase & namespace -- Nodecount +..*Cpuusagenanocores & % +..*MemoryWorkingsetBytes & % +..*MemoryRssBytes & % +..*Podcount by node, phase & namespace +..*Nodecount ### 01/09/2018 - Version microsoft/oms:ciprod01092019 - Omsagent - 1.8.1.256 (nov 2018 release) From 8221d2dd849427a08c0dcd6781cd050a8380c551 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 20 Feb 2019 15:54:08 -0800 Subject: [PATCH 65/88] fix formatting for readme --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 14c07e948..1a4506f1e 100644 --- a/README.md +++ b/README.md @@ -13,19 +13,19 @@ Note : The agent version(s) below has dates (ciprod), which indicate t ### 01/20/2019 - Version microsoft/oms:ciprod01202019 - Container logs enrichment optimization -..*Get container meta data only for containers in current node (vs cluster before) +..* Get container meta data only for containers in current node (vs cluster before) - Update fluent bit 0.13.7 => 0.14.4 -..*This fixes the escaping issue in the container logs +..* This fixes the escaping issue in the container logs - Mooncake cloud support for agent -..*Ability to disable agent telemetry -..*Ability to onboard and ingest to mooncake cloud +..* Ability to disable agent telemetry +..* Ability to onboard and ingest to mooncake cloud - Add & populate 'ContainerStatusReason' column to KubePodInventory - Alertable (custom) metrics (to AzureMonitor - only for AKS clusters) -..*Cpuusagenanocores & % -..*MemoryWorkingsetBytes & % -..*MemoryRssBytes & % -..*Podcount by node, phase & namespace -..*Nodecount +..* Cpuusagenanocores & % +..* MemoryWorkingsetBytes & % +..* MemoryRssBytes & % +..* Podcount by node, phase & namespace +..* Nodecount ### 01/09/2018 - Version microsoft/oms:ciprod01092019 - Omsagent - 1.8.1.256 (nov 2018 release) From 30aa305a0546474d55889ea63c7ab8ef84ae9dca Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 20 Feb 2019 15:57:17 -0800 Subject: [PATCH 66/88] fix formatting for readme --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 1a4506f1e..ab621104a 100644 --- a/README.md +++ b/README.md @@ -13,19 +13,19 @@ Note : The agent version(s) below has dates (ciprod), which indicate t ### 01/20/2019 - Version microsoft/oms:ciprod01202019 - Container logs enrichment optimization -..* Get container meta data only for containers in current node (vs cluster before) + * Get container meta data only for containers in current node (vs cluster before) - Update fluent bit 0.13.7 => 0.14.4 -..* This fixes the escaping issue in the container logs + * This fixes the escaping issue in the container logs - Mooncake cloud support for agent -..* Ability to disable agent telemetry -..* Ability to onboard and ingest to mooncake cloud + * Ability to disable agent telemetry + * Ability to onboard and ingest to mooncake cloud - Add & populate 'ContainerStatusReason' column to KubePodInventory - Alertable (custom) metrics (to AzureMonitor - only for AKS clusters) -..* Cpuusagenanocores & % -..* MemoryWorkingsetBytes & % -..* MemoryRssBytes & % -..* Podcount by node, phase & namespace -..* Nodecount + * Cpuusagenanocores & % + * MemoryWorkingsetBytes & % + * MemoryRssBytes & % + * Podcount by node, phase & namespace + * Nodecount ### 01/09/2018 - Version microsoft/oms:ciprod01092019 - Omsagent - 1.8.1.256 (nov 2018 release) From f401116124985b1c24f56557f957f00da423d6cd Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 20 Feb 2019 15:59:29 -0800 Subject: [PATCH 67/88] fix readme --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ab621104a..125aec3bb 100644 --- a/README.md +++ b/README.md @@ -16,16 +16,16 @@ Note : The agent version(s) below has dates (ciprod), which indicate t * Get container meta data only for containers in current node (vs cluster before) - Update fluent bit 0.13.7 => 0.14.4 * This fixes the escaping issue in the container logs -- Mooncake cloud support for agent +- Mooncake cloud support for agent (AKS only) * Ability to disable agent telemetry * Ability to onboard and ingest to mooncake cloud - Add & populate 'ContainerStatusReason' column to KubePodInventory - Alertable (custom) metrics (to AzureMonitor - only for AKS clusters) - * Cpuusagenanocores & % - * MemoryWorkingsetBytes & % - * MemoryRssBytes & % - * Podcount by node, phase & namespace - * Nodecount + * Cpuusagenanocores & % metric + * MemoryWorkingsetBytes & % metric + * MemoryRssBytes & % metric + * Podcount by node, phase & namespace metric + * Nodecount metric ### 01/09/2018 - Version microsoft/oms:ciprod01092019 - Omsagent - 1.8.1.256 (nov 2018 release) From a2f45afdac70173c994d73cd88ba34b20cd817d9 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 20 Feb 2019 16:13:57 -0800 Subject: [PATCH 68/88] fix readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 125aec3bb..4313de5c0 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ additional questions or comments. Note : The agent version(s) below has dates (ciprod), which indicate the agent build dates (not release dates) -### 01/20/2019 - Version microsoft/oms:ciprod01202019 +### 01/20/2019 - Version microsoft/oms:ciprod02202019 - Container logs enrichment optimization * Get container meta data only for containers in current node (vs cluster before) - Update fluent bit 0.13.7 => 0.14.4 From 759dbb57e1472df8476ad7acfd8fbc9231207e3a Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 20 Feb 2019 16:14:48 -0800 Subject: [PATCH 69/88] fix agent version for telemetry --- installer/conf/td-agent-bit.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 863e2d86a..467489d1c 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -28,5 +28,5 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion ciprod01202019 + AgentVersion ciprod02202019 From 7956f40d075476dc85633b53d72ed4eb8dfdc303 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 20 Feb 2019 17:16:25 -0800 Subject: [PATCH 70/88] fix date in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4313de5c0..59faf7e4d 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ additional questions or comments. Note : The agent version(s) below has dates (ciprod), which indicate the agent build dates (not release dates) -### 01/20/2019 - Version microsoft/oms:ciprod02202019 +### 02/20/2019 - Version microsoft/oms:ciprod02202019 - Container logs enrichment optimization * Get container meta data only for containers in current node (vs cluster before) - Update fluent bit 0.13.7 => 0.14.4 From ee056568eee328b2d37a0d7a75e1ccec370f1729 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Thu, 21 Feb 2019 09:15:08 -0800 Subject: [PATCH 71/88] update readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 59faf7e4d..b8d08b05a 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Note : The agent version(s) below has dates (ciprod), which indicate t * MemoryRssBytes & % metric * Podcount by node, phase & namespace metric * Nodecount metric +- ContainerNodeInventory_CL to fixed type ### 01/09/2018 - Version microsoft/oms:ciprod01092019 - Omsagent - 1.8.1.256 (nov 2018 release) From 2abcf67413b7c3fcbc8d1cd80511e1566fc124ba Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Thu, 21 Feb 2019 12:56:09 -0800 Subject: [PATCH 72/88] Restart logs every 10MB instead of weekly (#198) * Rotate logs every 10MB instead of weekly * Removing some logging, fixed log rotation --- source/code/plugin/filter_cadvisor2mdm.rb | 3 +-- source/code/plugin/filter_inventory2mdm.rb | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb index 85f9f688e..94f2107cc 100644 --- a/source/code/plugin/filter_cadvisor2mdm.rb +++ b/source/code/plugin/filter_cadvisor2mdm.rb @@ -63,7 +63,7 @@ def configure(conf) @log = nil if @enable_log - @log = Logger.new(@log_path, 'weekly') + @log = Logger.new(@log_path, 1, 5000000) @log.debug {'Starting filter_cadvisor2mdm plugin'} end end @@ -191,7 +191,6 @@ def get_metric_records(record, metric_name, metric_value, percentage_metric_valu } records.push(JSON.parse(additional_record)) end - @log.info "Metric Name: #{metric_name} Metric Value: #{metric_value} Percentage Metric Value: #{percentage_metric_value}" return records end diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb index 8aaa5ff01..84f12dd06 100644 --- a/source/code/plugin/filter_inventory2mdm.rb +++ b/source/code/plugin/filter_inventory2mdm.rb @@ -91,7 +91,7 @@ def configure(conf) @log = nil if @enable_log - @log = Logger.new(@log_path, 'weekly') + @log = Logger.new(@log_path, 1, 5000000) @log.debug {'Starting filter_inventory2mdm plugin'} end end @@ -179,10 +179,8 @@ def process_pod_inventory_records(es) # Collect all possible combinations of dimension values other than pod phase key_without_phase_dim_value = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue].join('~~') if no_phase_dim_values_hash.key?(key_without_phase_dim_value) - @log.info "#{key_without_phase_dim_value} already present in #{no_phase_dim_values_hash}" next else - @log.info "Adding #{key_without_phase_dim_value} to #{no_phase_dim_values_hash}" no_phase_dim_values_hash[key_without_phase_dim_value] = true end } From 18c107c4678cbbc53f14829458e781cc3b07d2c3 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Thu, 21 Feb 2019 13:30:42 -0800 Subject: [PATCH 73/88] update agent version for telemetry --- installer/conf/td-agent-bit.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 467489d1c..974e8564a 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -28,5 +28,5 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion ciprod02202019 + AgentVersion ciprod02212019 From 14b2b87c15bd4d49e2e5982789a5ba2649b3fc32 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Thu, 21 Feb 2019 13:33:02 -0800 Subject: [PATCH 74/88] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b8d08b05a..f72a16f1e 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ additional questions or comments. Note : The agent version(s) below has dates (ciprod), which indicate the agent build dates (not release dates) -### 02/20/2019 - Version microsoft/oms:ciprod02202019 +### 02/21/2019 - Version microsoft/oms:ciprod02212019 - Container logs enrichment optimization * Get container meta data only for containers in current node (vs cluster before) - Update fluent bit 0.13.7 => 0.14.4 From 5479dff7a93cc8f640412a90cac8523c283c201d Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Fri, 22 Feb 2019 11:44:15 -0800 Subject: [PATCH 75/88] Update kube.conf to use %STATE_DIR_WS% instead of hardcoded path --- installer/conf/kube.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 31a0778d3..454df6e91 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -159,7 +159,7 @@ num_threads 5 buffer_chunk_limit 20m buffer_type file - buffer_path /var/opt/microsoft/omsagent/6bb1e963-b08c-43a8-b708-1628305e964a/state/out_mdm_*.buffer + buffer_path %STATE_DIR_WS%/out_mdm_*.buffer buffer_queue_limit 20 buffer_queue_full_action drop_oldest_chunk flush_interval 20s @@ -167,4 +167,4 @@ retry_wait 30s max_retry_wait 9m retry_mdm_post_wait_minutes 60 - \ No newline at end of file + From cdded2ee004d2c72e09cb881448dfc4fde49332f Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Mon, 4 Mar 2019 15:38:18 -0800 Subject: [PATCH 76/88] Fix AKSEngine Crash (#200) --- source/code/plugin/CustomMetricsUtils.rb | 4 ++-- source/code/plugin/out_mdm.rb | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/source/code/plugin/CustomMetricsUtils.rb b/source/code/plugin/CustomMetricsUtils.rb index d06c9ad91..a19580630 100644 --- a/source/code/plugin/CustomMetricsUtils.rb +++ b/source/code/plugin/CustomMetricsUtils.rb @@ -9,8 +9,8 @@ class << self def check_custom_metrics_availability(custom_metric_regions) aks_region = ENV['AKS_REGION'] aks_resource_id = ENV['AKS_RESOURCE_ID'] - if aks_region.to_s.empty? && aks_resource_id.to_s.empty? - false # This will also take care of AKS-Engine Scenario. AKS_REGION/AKS_RESOURCE_ID is not set for AKS-Engine. Only ACS_RESOURCE_NAME is set + if aks_region.to_s.empty? || aks_resource_id.to_s.empty? + return false # This will also take care of AKS-Engine Scenario. AKS_REGION/AKS_RESOURCE_ID is not set for AKS-Engine. Only ACS_RESOURCE_NAME is set end custom_metrics_regions_arr = custom_metric_regions.split(',') diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb index 6bde98534..274f450fd 100644 --- a/source/code/plugin/out_mdm.rb +++ b/source/code/plugin/out_mdm.rb @@ -29,6 +29,7 @@ def initialize @cached_access_token = String.new @last_post_attempt_time = Time.now @first_post_attempt_made = false + @can_send_data_to_mdm = true end def configure(conf) @@ -39,7 +40,13 @@ def configure(conf) def start super - file = File.read(@@azure_json_path) + begin + file = File.read(@@azure_json_path) + rescue => e + @log.info "Unable to read file #{@@azure_json_path} #{e}" + @can_send_data_to_mdm = false + return + end # Handle the case where the file read fails. Send Telemetry and exit the plugin? @data_hash = JSON.parse(file) @token_url = @@token_url_template % {tenant_id: @data_hash['tenantId']} @@ -48,11 +55,13 @@ def start aks_region = ENV['AKS_REGION'] if aks_resource_id.to_s.empty? @log.info "Environment Variable AKS_RESOURCE_ID is not set.. " - raise Exception.new "Environment Variable AKS_RESOURCE_ID is not set!!" + @can_send_data_to_mdm = false + return end if aks_region.to_s.empty? @log.info "Environment Variable AKS_REGION is not set.. " - raise Exception.new "Environment Variable AKS_REGION is not set!!" + @can_send_data_to_mdm = false + return end @@post_request_url = @@post_request_url_template % {aks_region: aks_region, aks_resource_id: aks_resource_id} @@ -115,14 +124,18 @@ def format(tag, time, record) # 'chunk' is a buffer chunk that includes multiple formatted records def write(chunk) begin - if !@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes*60) + if (!@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes*60)) && @can_send_data_to_mdm post_body = [] chunk.msgpack_each {|(tag, record)| post_body.push(record.to_json) } send_to_mdm post_body else - @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time)/60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP" + if !@can_send_data_to_mdm + @log.info "Cannot send data to MDM since all required conditions were not met" + else + @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time)/60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP" + end end rescue Exception => e @log.info "Exception when writing to MDM: #{e}" From 57be1c4be9f3a6234a9aff130da2ef327c958d1c Mon Sep 17 00:00:00 2001 From: Vishwanath Narasimhan Date: Tue, 12 Mar 2019 17:47:17 -0700 Subject: [PATCH 77/88] hotfix * close resp.Body * remove chatty logs * membuf=5m and ignore files not updated since 5 mins --- installer/conf/td-agent-bit.conf | 7 ++++--- source/code/go/src/plugins/oms.go | 11 ++++------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index f01857cd7..9175b68ce 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -10,16 +10,17 @@ Path /var/log/containers/*.log DB /var/log/omsagent-fblogs.db Parser docker - Mem_Buf_Limit 30m + Mem_Buf_Limit 5m Path_Key filepath Skip_Long_Lines On + Ignore_Older 5m [INPUT] Name tail Tag oms.container.log.flbplugin.* Path /var/log/containers/omsagent*.log DB /var/opt/microsoft/docker-cimprov/state/omsagent-ai.db - Mem_Buf_Limit 30m + Mem_Buf_Limit 2m Path_Key filepath Skip_Long_Lines On @@ -28,6 +29,6 @@ EnableTelemetry true TelemetryPushIntervalSeconds 300 Match oms.container.log.* - AgentVersion ciprod02212019 + AgentVersion ciprod03122019 diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index d913c6c32..36cf20273 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -246,16 +246,11 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { if val, ok := imageIDMap[containerID]; ok { stringMap["Image"] = val - } else { - Log("ContainerId %s not present in Name Map ", containerID) - } + } if val, ok := nameIDMap[containerID]; ok { stringMap["Name"] = val - } else { - Log("ContainerId %s not present in Image Map ", containerID) - } - + } dataItem := DataItem{ ID: stringMap["Id"], @@ -319,6 +314,8 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { return output.FLB_RETRY } + defer resp.Body.Close() + numRecords := len(dataItems) Log("Successfully flushed %d records in %s", numRecords, elapsed) ContainerLogTelemetryMutex.Lock() From 940a6eb2c1adc215e0dccdc33579159a961f4b9a Mon Sep 17 00:00:00 2001 From: Vishwanath Narasimhan Date: Tue, 12 Mar 2019 17:59:57 -0700 Subject: [PATCH 78/88] fix readme for new version --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index f72a16f1e..0a0b9ce08 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,13 @@ additional questions or comments. ## Release History Note : The agent version(s) below has dates (ciprod), which indicate the agent build dates (not release dates) + +### 03/12/2019 - Version microsoft/oms:ciprod03122019 +- Fix for closing response.Body in outoms +- Update Mem_Buf_Limit to 5m for fluentbit +- Tail only files that were modified since 5 minutes +- Remove some unwanted logs that are chatty in outoms +- Fix for MDM disablement for AKS-Engine ### 02/21/2019 - Version microsoft/oms:ciprod02212019 - Container logs enrichment optimization From 411582432119d9d2ace3b8f3b9b0a2aad12089c5 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Wed, 13 Mar 2019 11:25:12 -0700 Subject: [PATCH 79/88] Fix the pod count in mdm agent plugin (#203) --- source/code/plugin/filter_inventory2mdm.rb | 35 ++++++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/source/code/plugin/filter_inventory2mdm.rb b/source/code/plugin/filter_inventory2mdm.rb index 84f12dd06..553c857b7 100644 --- a/source/code/plugin/filter_inventory2mdm.rb +++ b/source/code/plugin/filter_inventory2mdm.rb @@ -154,20 +154,42 @@ def process_pod_inventory_records(es) timestamp = DateTime.now pod_count_hash = Hash.new no_phase_dim_values_hash = Hash.new + total_pod_count = 0 + pod_count_by_phase = {} + podUids = {} + record_count = 0 begin records = [] es.each{|time,record| - + record_count += 1 timestamp = record['DataItems'][0]['CollectionTime'] + podUid = record['DataItems'][0]['PodUid'] + + if podUids.key?(podUid) + #@log.info "pod with #{podUid} already counted" + next + end + + podUids[podUid] = true podPhaseDimValue = record['DataItems'][0]['PodStatus'] podNamespaceDimValue = record['DataItems'][0]['Namespace'] podControllerNameDimValue = record['DataItems'][0]['ControllerName'] podNodeDimValue = record['DataItems'][0]['Computer'] - + # group by distinct dimension values pod_key = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue, podPhaseDimValue].join('~~') - - if pod_count_hash.key?(pod_key) + + if pod_count_by_phase.key?(podPhaseDimValue) + phase_count = pod_count_by_phase[podPhaseDimValue] + phase_count += 1 + pod_count_by_phase[podPhaseDimValue] = phase_count + else + pod_count_by_phase[podPhaseDimValue] = 1 + end + + total_pod_count += 1 + + if pod_count_hash.key?(pod_key) pod_count = pod_count_hash[pod_key] pod_count = pod_count + 1 pod_count_hash[pod_key] = pod_count @@ -175,7 +197,7 @@ def process_pod_inventory_records(es) pod_count = 1 pod_count_hash[pod_key] = pod_count end - + # Collect all possible combinations of dimension values other than pod phase key_without_phase_dim_value = [podNodeDimValue, podNamespaceDimValue, podControllerNameDimValue].join('~~') if no_phase_dim_values_hash.key?(key_without_phase_dim_value) @@ -191,7 +213,7 @@ def process_pod_inventory_records(es) pod_key = [key, phase].join('~~') if !pod_count_hash.key?(pod_key) pod_count_hash[pod_key] = 0 - @log.info "Zero filled #{pod_key}" + #@log.info "Zero filled #{pod_key}" else next end @@ -227,6 +249,7 @@ def process_pod_inventory_records(es) ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) return [],timestamp end + @log.info "Record Count #{record_count} pod count = #{total_pod_count} Pod Count To Phase #{pod_count_by_phase} " return records, timestamp end From df2e64c19bc9e427c72ffe492375b598a8933bfe Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 13 Mar 2019 11:27:48 -0700 Subject: [PATCH 80/88] Update readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0a0b9ce08..916863dbf 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Note : The agent version(s) below has dates (ciprod), which indicate t - Tail only files that were modified since 5 minutes - Remove some unwanted logs that are chatty in outoms - Fix for MDM disablement for AKS-Engine +- Fix for Pod count metric (same as container count) in MDM ### 02/21/2019 - Version microsoft/oms:ciprod02212019 - Container logs enrichment optimization From 19c2bc7864a4aabade944c327101ddc789850059 Mon Sep 17 00:00:00 2001 From: Vishwanath Narasimhan Date: Wed, 13 Mar 2019 12:13:12 -0700 Subject: [PATCH 81/88] string freeze for out_mdm plugin --- source/code/plugin/out_mdm.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb index 274f450fd..93b32ef50 100644 --- a/source/code/plugin/out_mdm.rb +++ b/source/code/plugin/out_mdm.rb @@ -1,3 +1,6 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + module Fluent class OutputMDM < BufferedOutput From 69935b305ab3552bc8626c8f81a802ec559a31e4 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Mon, 1 Apr 2019 11:09:27 -0700 Subject: [PATCH 82/88] Vishwa/resourcecentric (#208) * resourceid fix (for AKS only) * fix name --- source/code/go/src/plugins/oms.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 36cf20273..a1ca3d6ee 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -44,6 +44,10 @@ var ( Computer string // WorkspaceID log analytics workspace id WorkspaceID string + // ResourceID for resource-centric log analytics data + ResourceID string + // Resource-centric flag (will be true if we determine if above RseourceID is non-empty - default is false) + ResourceCentric bool ) var ( @@ -294,6 +298,10 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { } req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) req.Header.Set("Content-Type", "application/json") + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) + } resp, err := HTTPClient.Do(req) elapsed := time.Since(start) @@ -377,6 +385,11 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { } OMSEndpoint = omsadminConf["OMS_ENDPOINT"] WorkspaceID = omsadminConf["WORKSPACE_ID"] + ResourceID = os.Getenv("customResourceId") + if len(ResourceID) > 0 { + ResourceCentric = true + Log("OMS ResourceId=%s",ResourceID) + } Log("OMSEndpoint %s", OMSEndpoint) // Initialize image,name map refresh ticker From 6953f50a62c7faade0db553e0839f137b252309b Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Mon, 1 Apr 2019 14:48:19 -0700 Subject: [PATCH 83/88] Rashmi/win nodepool - PR (#206) * changes for win nodes enumeration * changes * changes * changes * node cpu metric rate changes * container cpu rate * changes * changes * changes * changes * changes * changes to include in_win_cadvisor_perf.rb file * send containerinventoryheartbeatevent * changes * cahnges for mdm metrics * changes * cahnges * changes * container states * changes * changes * changes for env variables * changes * changes * changes * changes * delete comments * changes * mutex changes * changes * changes * changes * telemetry fix for docker version * removing hardcoded values for mdm * update docker version * telemetry for windows cadvisor timeouts * exeception key update to computer * PR comments --- installer/conf/kube.conf | 47 + installer/datafiles/base_container.data | 1 + .../code/plugin/ApplicationInsightsUtility.rb | 379 +++--- .../code/plugin/CAdvisorMetricsAPIClient.rb | 1020 ++++++++++------- source/code/plugin/KubernetesApiClient.rb | 938 +++++++-------- source/code/plugin/in_cadvisor_perf.rb | 152 ++- source/code/plugin/in_containerinventory.rb | 179 ++- source/code/plugin/in_kube_nodes.rb | 319 +++--- source/code/plugin/in_kube_podinventory.rb | 397 ++++--- source/code/plugin/in_win_cadvisor_perf.rb | 120 ++ source/code/plugin/out_mdm.rb | 94 +- 11 files changed, 2096 insertions(+), 1550 deletions(-) create mode 100644 source/code/plugin/in_win_cadvisor_perf.rb diff --git a/installer/conf/kube.conf b/installer/conf/kube.conf index 454df6e91..0dfa3710e 100644 --- a/installer/conf/kube.conf +++ b/installer/conf/kube.conf @@ -47,12 +47,44 @@ log_level debug +#cadvisor perf- Windows nodes + + type wincadvisorperf + tag oms.api.wincadvisorperf + run_interval 60s + log_level debug + + type filter_inventory2mdm custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope log_level info +#custom_metrics_mdm filter plugin for perf data from windows nodes + + type filter_cadvisor2mdm + custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope + metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes + log_level info + + + + type out_mdm + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_mdm_cdvisorperf*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + retry_mdm_post_wait_minutes 60 + + type out_oms log_level debug @@ -168,3 +200,18 @@ max_retry_wait 9m retry_mdm_post_wait_minutes 60 + + + type out_oms + log_level debug + num_threads 5 + buffer_chunk_limit 20m + buffer_type file + buffer_path %STATE_DIR_WS%/out_oms_api_wincadvisorperf*.buffer + buffer_queue_limit 20 + buffer_queue_full_action drop_oldest_chunk + flush_interval 20s + retry_limit 10 + retry_wait 30s + max_retry_wait 9m + \ No newline at end of file diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index c263aa505..9c4d563f8 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -34,6 +34,7 @@ MAINTAINER: 'Microsoft Corporation' /opt/microsoft/omsagent/plugin/CAdvisorMetricsAPIClient.rb; source/code/plugin/CAdvisorMetricsAPIClient.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_kube_perf.rb; source/code/plugin/in_kube_perf.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_cadvisor_perf.rb; source/code/plugin/in_cadvisor_perf.rb; 644; root; root +/opt/microsoft/omsagent/plugin/in_win_cadvisor_perf.rb; source/code/plugin/in_win_cadvisor_perf.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_kube_services.rb; source/code/plugin/in_kube_services.rb; 644; root; root /opt/microsoft/omsagent/plugin/in_kube_nodes.rb; source/code/plugin/in_kube_nodes.rb; 644; root; root /opt/microsoft/omsagent/plugin/filter_inventory2mdm.rb; source/code/plugin/filter_inventory2mdm.rb; 644; root; root diff --git a/source/code/plugin/ApplicationInsightsUtility.rb b/source/code/plugin/ApplicationInsightsUtility.rb index 5c5e92a6c..5dc2bfab8 100644 --- a/source/code/plugin/ApplicationInsightsUtility.rb +++ b/source/code/plugin/ApplicationInsightsUtility.rb @@ -2,209 +2,222 @@ # frozen_string_literal: true class ApplicationInsightsUtility - require_relative 'lib/application_insights' - require_relative 'omslog' - require_relative 'DockerApiClient' - require_relative 'oms_common' - require 'json' - require 'base64' + require_relative "lib/application_insights" + require_relative "omslog" + require_relative "DockerApiClient" + require_relative "oms_common" + require "json" + require "base64" - @@HeartBeat = 'HeartBeatEvent' - @@Exception = 'ExceptionEvent' - @@AcsClusterType = 'ACS' - @@AksClusterType = 'AKS' - @OmsAdminFilePath = '/etc/opt/microsoft/omsagent/conf/omsadmin.conf' - @@EnvAcsResourceName = 'ACS_RESOURCE_NAME' - @@EnvAksRegion = 'AKS_REGION' - @@EnvAgentVersion = 'AGENT_VERSION' - @@EnvApplicationInsightsKey = 'APPLICATIONINSIGHTS_AUTH' - @@EnvControllerType = 'CONTROLLER_TYPE' + @@HeartBeat = "HeartBeatEvent" + @@Exception = "ExceptionEvent" + @@AcsClusterType = "ACS" + @@AksClusterType = "AKS" + @OmsAdminFilePath = "/etc/opt/microsoft/omsagent/conf/omsadmin.conf" + @@EnvAcsResourceName = "ACS_RESOURCE_NAME" + @@EnvAksRegion = "AKS_REGION" + @@EnvAgentVersion = "AGENT_VERSION" + @@EnvApplicationInsightsKey = "APPLICATIONINSIGHTS_AUTH" + @@EnvControllerType = "CONTROLLER_TYPE" - @@CustomProperties = {} - @@Tc = nil - @@hostName = (OMS::Common.get_hostname) + @@CustomProperties = {} + @@Tc = nil + @@hostName = (OMS::Common.get_hostname) - def initialize - end + def initialize + end - class << self - #Set default properties for telemetry event - def initializeUtility() - begin - resourceInfo = ENV['AKS_RESOURCE_ID'] - if resourceInfo.nil? || resourceInfo.empty? - @@CustomProperties["ACSResourceName"] = ENV[@@EnvAcsResourceName] - @@CustomProperties["ClusterType"] = @@AcsClusterType - @@CustomProperties["SubscriptionID"] = "" - @@CustomProperties["ResourceGroupName"] = "" - @@CustomProperties["ClusterName"] = "" - @@CustomProperties["Region"] = "" - else - @@CustomProperties["AKS_RESOURCE_ID"] = resourceInfo - begin - splitStrings = resourceInfo.split('/') - subscriptionId = splitStrings[2] - resourceGroupName = splitStrings[4] - clusterName = splitStrings[8] - rescue => errorStr - $log.warn("Exception in AppInsightsUtility: parsing AKS resourceId: #{resourceInfo}, error: #{errorStr}") - end - @@CustomProperties["ClusterType"] = @@AksClusterType - @@CustomProperties["SubscriptionID"] = subscriptionId - @@CustomProperties["ResourceGroupName"] = resourceGroupName - @@CustomProperties["ClusterName"] = clusterName - @@CustomProperties["Region"] = ENV[@@EnvAksRegion] - end + class << self + #Set default properties for telemetry event + def initializeUtility() + begin + resourceInfo = ENV["AKS_RESOURCE_ID"] + if resourceInfo.nil? || resourceInfo.empty? + @@CustomProperties["ACSResourceName"] = ENV[@@EnvAcsResourceName] + @@CustomProperties["ClusterType"] = @@AcsClusterType + @@CustomProperties["SubscriptionID"] = "" + @@CustomProperties["ResourceGroupName"] = "" + @@CustomProperties["ClusterName"] = "" + @@CustomProperties["Region"] = "" + else + @@CustomProperties["AKS_RESOURCE_ID"] = resourceInfo + begin + splitStrings = resourceInfo.split("/") + subscriptionId = splitStrings[2] + resourceGroupName = splitStrings[4] + clusterName = splitStrings[8] + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: parsing AKS resourceId: #{resourceInfo}, error: #{errorStr}") + end + @@CustomProperties["ClusterType"] = @@AksClusterType + @@CustomProperties["SubscriptionID"] = subscriptionId + @@CustomProperties["ResourceGroupName"] = resourceGroupName + @@CustomProperties["ClusterName"] = clusterName + @@CustomProperties["Region"] = ENV[@@EnvAksRegion] + end - getDockerInfo() - @@CustomProperties['WorkspaceID'] = getWorkspaceId - @@CustomProperties['AgentVersion'] = ENV[@@EnvAgentVersion] - @@CustomProperties['ControllerType'] = ENV[@@EnvControllerType] - encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey] + #Commenting it for now from initilize method, we need to pivot all telemetry off of kubenode docker version + #getDockerInfo() + @@CustomProperties["WorkspaceID"] = getWorkspaceId + @@CustomProperties["AgentVersion"] = ENV[@@EnvAgentVersion] + @@CustomProperties["ControllerType"] = ENV[@@EnvControllerType] + encodedAppInsightsKey = ENV[@@EnvApplicationInsightsKey] - #Check if telemetry is turned off - telemetryOffSwitch = ENV['DISABLE_TELEMETRY'] - if telemetryOffSwitch && !telemetryOffSwitch.nil? && !telemetryOffSwitch.empty? && telemetryOffSwitch.downcase == "true".downcase - $log.warn("AppInsightsUtility: Telemetry is disabled") - @@Tc = ApplicationInsights::TelemetryClient.new - elsif !encodedAppInsightsKey.nil? - decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey) - @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey - - end - rescue => errorStr - $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}") - end + #Check if telemetry is turned off + telemetryOffSwitch = ENV["DISABLE_TELEMETRY"] + if telemetryOffSwitch && !telemetryOffSwitch.nil? && !telemetryOffSwitch.empty? && telemetryOffSwitch.downcase == "true".downcase + $log.warn("AppInsightsUtility: Telemetry is disabled") + @@Tc = ApplicationInsights::TelemetryClient.new + elsif !encodedAppInsightsKey.nil? + decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey) + @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: initilizeUtility - error: #{errorStr}") + end + end - def getDockerInfo() - dockerInfo = DockerApiClient.dockerInfo - if (!dockerInfo.nil? && !dockerInfo.empty?) - @@CustomProperties['DockerVersion'] = dockerInfo['Version'] - @@CustomProperties['DockerApiVersion'] = dockerInfo['ApiVersion'] - end - end + def getDockerInfo() + dockerInfo = DockerApiClient.dockerInfo + if (!dockerInfo.nil? && !dockerInfo.empty?) + @@CustomProperties["DockerVersion"] = dockerInfo["Version"] + #@@CustomProperties["DockerApiVersion"] = dockerInfo["ApiVersion"] + end + end - def sendHeartBeatEvent(pluginName) - begin - eventName = pluginName + @@HeartBeat - if !(@@Tc.nil?) - @@Tc.track_event eventName , :properties => @@CustomProperties - @@Tc.flush - $log.info("AppInsights Heartbeat Telemetry sent successfully") - end - rescue =>errorStr - $log.warn("Exception in AppInsightsUtility: sendHeartBeatEvent - error: #{errorStr}") - end + def sendHeartBeatEvent(pluginName) + begin + eventName = pluginName + @@HeartBeat + if !(@@Tc.nil?) + @@Tc.track_event eventName, :properties => @@CustomProperties + @@Tc.flush + $log.info("AppInsights Heartbeat Telemetry sent successfully") end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendHeartBeatEvent - error: #{errorStr}") + end + end - def sendLastProcessedContainerInventoryCountMetric(pluginName, properties) - begin - if !(@@Tc.nil?) - @@Tc.track_metric 'LastProcessedContainerInventoryCount', properties['ContainerCount'], - :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, - :properties => @@CustomProperties - @@Tc.flush - $log.info("AppInsights Container Count Telemetry sent successfully") - end - rescue => errorStr - $log.warn("Exception in AppInsightsUtility: sendCustomMetric - error: #{errorStr}") - end + def sendLastProcessedContainerInventoryCountMetric(pluginName, properties) + begin + if !(@@Tc.nil?) + @@Tc.track_metric "LastProcessedContainerInventoryCount", properties["ContainerCount"], + :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, + :properties => @@CustomProperties + @@Tc.flush + $log.info("AppInsights Container Count Telemetry sent successfully") end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendCustomMetric - error: #{errorStr}") + end + end - def sendCustomEvent(eventName, properties) - begin - if @@CustomProperties.empty? || @@CustomProperties.nil? - initializeUtility() - end - if !(@@Tc.nil?) - @@Tc.track_event eventName, :properties => @@CustomProperties - @@Tc.flush - $log.info("AppInsights Custom Event #{eventName} sent successfully") - end - rescue => errorStr - $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}") - end + def sendCustomEvent(eventName, properties) + begin + if @@CustomProperties.empty? || @@CustomProperties.nil? + initializeUtility() + end + telemetryProps = {} + # add common dimensions + @@CustomProperties.each { |k, v| telemetryProps[k] = v } + # add passed-in dimensions if any + if (!properties.nil? && !properties.empty?) + properties.each { |k, v| telemetryProps[k] = v } + end + if !(@@Tc.nil?) + @@Tc.track_event eventName, :properties => telemetryProps + @@Tc.flush + $log.info("AppInsights Custom Event #{eventName} sent successfully") end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendCustomEvent - error: #{errorStr}") + end + end - def sendExceptionTelemetry(errorStr) - begin - if @@CustomProperties.empty? || @@CustomProperties.nil? - initializeUtility() - elsif @@CustomProperties['DockerVersion'].nil? - getDockerInfo() - end - if !(@@Tc.nil?) - @@Tc.track_exception errorStr , :properties => @@CustomProperties - @@Tc.flush - $log.info("AppInsights Exception Telemetry sent successfully") - end - rescue => errorStr - $log.warn("Exception in AppInsightsUtility: sendExceptionTelemetry - error: #{errorStr}") - end + def sendExceptionTelemetry(errorStr, properties = nil) + begin + if @@CustomProperties.empty? || @@CustomProperties.nil? + initializeUtility() + elsif @@CustomProperties["DockerVersion"].nil? + getDockerInfo() + end + telemetryProps = {} + # add common dimensions + @@CustomProperties.each { |k, v| telemetryProps[k] = v } + # add passed-in dimensions if any + if (!properties.nil? && !properties.empty?) + properties.each { |k, v| telemetryProps[k] = v } + end + if !(@@Tc.nil?) + @@Tc.track_exception errorStr, :properties => telemetryProps + @@Tc.flush + $log.info("AppInsights Exception Telemetry sent successfully") end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendExceptionTelemetry - error: #{errorStr}") + end + end - #Method to send heartbeat and container inventory count - def sendTelemetry(pluginName, properties) - begin - if @@CustomProperties.empty? || @@CustomProperties.nil? - initializeUtility() - elsif @@CustomProperties['DockerVersion'].nil? - getDockerInfo() - end - @@CustomProperties['Computer'] = properties['Computer'] - sendHeartBeatEvent(pluginName) - sendLastProcessedContainerInventoryCountMetric(pluginName, properties) - rescue => errorStr - $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}") - end + #Method to send heartbeat and container inventory count + def sendTelemetry(pluginName, properties) + begin + if @@CustomProperties.empty? || @@CustomProperties.nil? + initializeUtility() + elsif @@CustomProperties["DockerVersion"].nil? + getDockerInfo() end + @@CustomProperties["Computer"] = properties["Computer"] + sendHeartBeatEvent(pluginName) + sendLastProcessedContainerInventoryCountMetric(pluginName, properties) + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendTelemetry - error: #{errorStr}") + end + end - #Method to send metric. It will merge passed-in properties with common custom properties - def sendMetricTelemetry(metricName, metricValue, properties) - begin - if (metricName.empty? || metricName.nil?) - $log.warn("SendMetricTelemetry: metricName is missing") - return - end - if @@CustomProperties.empty? || @@CustomProperties.nil? - initializeUtility() - elsif @@CustomProperties['DockerVersion'].nil? - getDockerInfo() - end - telemetryProps = {} - telemetryProps["Computer"] = @@hostName - # add common dimensions - @@CustomProperties.each{ |k,v| telemetryProps[k]=v} - # add passed-in dimensions if any - if (!properties.nil? && !properties.empty?) - properties.each{ |k,v| telemetryProps[k]=v} - end - if !(@@Tc.nil?) - @@Tc.track_metric metricName, metricValue, - :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, - :properties => telemetryProps - @@Tc.flush - $log.info("AppInsights metric Telemetry #{metricName} sent successfully") - end - rescue => errorStr - $log.warn("Exception in AppInsightsUtility: sendMetricTelemetry - error: #{errorStr}") - end + #Method to send metric. It will merge passed-in properties with common custom properties + def sendMetricTelemetry(metricName, metricValue, properties) + begin + if (metricName.empty? || metricName.nil?) + $log.warn("SendMetricTelemetry: metricName is missing") + return end + if @@CustomProperties.empty? || @@CustomProperties.nil? + initializeUtility() + elsif @@CustomProperties["DockerVersion"].nil? + getDockerInfo() + end + telemetryProps = {} + # add common dimensions + @@CustomProperties.each { |k, v| telemetryProps[k] = v } + # add passed-in dimensions if any + if (!properties.nil? && !properties.empty?) + properties.each { |k, v| telemetryProps[k] = v } + end + if !(@@Tc.nil?) + @@Tc.track_metric metricName, metricValue, + :kind => ApplicationInsights::Channel::Contracts::DataPointType::MEASUREMENT, + :properties => telemetryProps + @@Tc.flush + $log.info("AppInsights metric Telemetry #{metricName} sent successfully") + end + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: sendMetricTelemetry - error: #{errorStr}") + end + end - def getWorkspaceId() - begin - adminConf = {} - confFile = File.open(@OmsAdminFilePath, "r") - confFile.each_line do |line| - splitStrings = line.split('=') - adminConf[splitStrings[0]] = splitStrings[1] - end - workspaceId = adminConf['WORKSPACE_ID'] - return workspaceId - rescue => errorStr - $log.warn("Exception in AppInsightsUtility: getWorkspaceId - error: #{errorStr}") - end + def getWorkspaceId() + begin + adminConf = {} + confFile = File.open(@OmsAdminFilePath, "r") + confFile.each_line do |line| + splitStrings = line.split("=") + adminConf[splitStrings[0]] = splitStrings[1] end + workspaceId = adminConf["WORKSPACE_ID"] + return workspaceId + rescue => errorStr + $log.warn("Exception in AppInsightsUtility: getWorkspaceId - error: #{errorStr}") + end end -end \ No newline at end of file + end +end diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb index 3c36775af..8b4fd9fcf 100644 --- a/source/code/plugin/CAdvisorMetricsAPIClient.rb +++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb @@ -2,424 +2,628 @@ # frozen_string_literal: true class CAdvisorMetricsAPIClient - - require 'json' - require 'logger' - require 'net/http' - require 'net/https' - require 'uri' - require 'date' - - require_relative 'oms_common' - require_relative 'KubernetesApiClient' - require_relative 'ApplicationInsightsUtility' - - @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt" - @Log = Logger.new(@LogPath, 2, 10*1048576) #keep last 2 files, max log file size = 10M - @@rxBytesLast = nil - @@rxBytesTimeLast = nil - @@txBytesLast = nil - @@txBytesTimeLast = nil - @@nodeCpuUsageNanoSecondsLast = nil - @@nodeCpuUsageNanoSecondsTimeLast = nil - @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i - @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i - - - def initialize + require "json" + require "logger" + require "net/http" + require "net/https" + require "uri" + require "date" + + require_relative "oms_common" + require_relative "KubernetesApiClient" + require_relative "ApplicationInsightsUtility" + + @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt" + @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M + # @@rxBytesLast = nil + # @@rxBytesTimeLast = nil + # @@txBytesLast = nil + # @@txBytesTimeLast = nil + @@nodeCpuUsageNanoSecondsLast = nil + @@nodeCpuUsageNanoSecondsTimeLast = nil + @@winNodeCpuUsageNanoSecondsLast = {} + @@winNodeCpuUsageNanoSecondsTimeLast = {} + @@winContainerCpuUsageNanoSecondsLast = {} + @@winContainerCpuUsageNanoSecondsTimeLast = {} + @@winContainerPrevMetricRate = {} + @@linuxNodePrevMetricRate = nil + @@winNodePrevMetricRate = {} + @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i + @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i + + #Containers a hash of node name and the last time telemetry was sent for this node + @@nodeTelemetryTimeTracker = {} + + # Keeping track of containers so that can delete the container from the container cpu cache when the container is deleted + # as a part of the cleanup routine + @@winContainerIdCache = [] + + def initialize + end + + class << self + def getSummaryStatsFromCAdvisor(winNode) + headers = {} + response = nil + @Log.info "Getting CAdvisor Uri" + begin + cAdvisorUri = getCAdvisorUri(winNode) + if !cAdvisorUri.nil? + uri = URI.parse(cAdvisorUri) + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = false + + cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri) + response = http.request(cAdvisorApiRequest) + @Log.info "Got response code #{response.code} from #{uri.request_uri}" + end + rescue => error + @Log.warn("CAdvisor api request failed: #{error}") + telemetryProps = {} + telemetryProps["Computer"] = winNode["Hostname"] + ApplicationInsightsUtility.sendExceptionTelemetry(error, telemetryProps) + end + return response + end + + def getCAdvisorUri(winNode) + begin + defaultHost = "http://localhost:10255" + relativeUri = "/stats/summary" + if !winNode.nil? + nodeIP = winNode["InternalIP"] + else + nodeIP = ENV["NODE_IP"] + end + if !nodeIP.nil? + @Log.info("Using #{nodeIP + relativeUri} for CAdvisor Uri") + return "http://#{nodeIP}:10255" + relativeUri + else + @Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost + relativeUri} ") + if !winNode.nil? + return nil + else + return defaultHost + relativeUri + end + end + end + end + + def getMetrics(winNode = nil) + metricDataItems = [] + begin + if !winNode.nil? + hostName = winNode["Hostname"] + operatingSystem = "Windows" + else + hostName = (OMS::Common.get_hostname) + operatingSystem = "Linux" + end + cAdvisorStats = getSummaryStatsFromCAdvisor(winNode) + if !cAdvisorStats.nil? + metricInfo = JSON.parse(cAdvisorStats.body) + end + if !metricInfo.nil? + metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", "memoryWorkingSetBytes")) + metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch")) + + if operatingSystem == "Linux" + metricDataItems.concat(getContainerCpuMetricItems(metricInfo, hostName, "usageNanoCores", "cpuUsageNanoCores")) + metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes")) + metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes")) + elsif operatingSystem == "Windows" + containerCpuUsageNanoSecondsRate = getContainerCpuMetricItemRate(metricInfo, hostName, "usageCoreNanoSeconds", "cpuUsageNanoCores") + if containerCpuUsageNanoSecondsRate && !containerCpuUsageNanoSecondsRate.empty? && !containerCpuUsageNanoSecondsRate.nil? + metricDataItems.concat(containerCpuUsageNanoSecondsRate) end - - class << self - def getSummaryStatsFromCAdvisor() - headers = {} - response = nil - @Log.info 'Getting CAdvisor Uri' - begin - cAdvisorUri = getCAdvisorUri() - if !cAdvisorUri.nil? - uri = URI.parse(cAdvisorUri) - http = Net::HTTP.new(uri.host, uri.port) - http.use_ssl = false - - cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri) - response = http.request(cAdvisorApiRequest) - @Log.info "Got response code #{response.code} from #{uri.request_uri}" - end - rescue => error - @Log.warn("CAdvisor api request failed: #{error}") - end - return response - end - - def getCAdvisorUri() - begin - defaultHost = "http://localhost:10255" - relativeUri = "/stats/summary" - nodeIP = ENV['NODE_IP'] - if !nodeIP.nil? - @Log.info("Using #{nodeIP + relativeUri} for CAdvisor Uri") - return "http://#{nodeIP}:10255" + relativeUri - else - @Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost + relativeUri} ") - return defaultHost + relativeUri - end - end - end - - def getMetrics() - metricDataItems = [] - begin - hostName = (OMS::Common.get_hostname) - metricInfo = JSON.parse(getSummaryStatsFromCAdvisor().body) - metricDataItems.concat(getContainerCpuMetricItems(metricInfo, hostName, "usageNanoCores","cpuUsageNanoCores")) - metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", "memoryWorkingSetBytes")) - metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", "memoryRssBytes")) - metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch")) - - cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores") - if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil? - metricDataItems.push(cpuUsageNanoSecondsRate) - end - metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes")) - metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", "memoryRssBytes")) - metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes")) - metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "txBytes", "networkTxBytes")) - metricDataItems.push(getNodeLastRebootTimeMetric(metricInfo, hostName, "restartTimeEpoch")) - - networkRxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "rxBytes", "networkRxBytesPerSec") - if networkRxRate && !networkRxRate.empty? && !networkRxRate.nil? - metricDataItems.push(networkRxRate) - end - networkTxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "txBytes", "networkTxBytesPerSec") - if networkTxRate && !networkTxRate.empty? && !networkTxRate.nil? - metricDataItems.push(networkTxRate) - end - - - rescue => error - @Log.warn("getContainerMetrics failed: #{error}") - return metricDataItems - end - return metricDataItems - end + end - def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn) - metricItems = [] - clusterId = KubernetesApiClient.getClusterId - timeDifference = (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs - timeDifferenceInMinutes = timeDifference/60 - begin - metricInfo = metricJSON - metricInfo['pods'].each do |pod| - podUid = pod['podRef']['uid'] - podName = pod['podRef']['name'] - podNamespace = pod['podRef']['namespace'] - - if (!pod['containers'].nil?) - pod['containers'].each do |container| - #cpu metric - containerName = container['name'] - metricValue = container['cpu'][cpuMetricNameToCollect] - metricTime = container['cpu']['time'] - metricItem = {} - metricItem['DataItems'] = [] - - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = hostName - metricProps['ObjectName'] = "K8SContainer" - metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName - - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = metricValue - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - metricItems.push(metricItem) - #Telemetry about agent performance - begin - # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers - # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use - if (podName.downcase.start_with?('omsagent-') && podNamespace.eql?("kube-system") && containerName.downcase.start_with?('omsagent') && metricNametoReturn.eql?("cpuUsageNanoCores")) - - if (timeDifferenceInMinutes >= 10) - telemetryProps = {} - telemetryProps['PodName'] = podName - telemetryProps['ContainerName'] = containerName - ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) - end - end - rescue => errorStr - $log.warn("Exception while generating Telemetry from getcontainerCpuMetricItems failed: #{errorStr} for metric #{cpuMetricNameToCollect}") - end - end - end - end - # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs) - if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("cpuUsageNanoCores")) - @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i - end - rescue => error - @Log.warn("getcontainerCpuMetricItems failed: #{error} for metric #{cpuMetricNameToCollect}") - return metricItems - end - return metricItems - end + cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", "cpuUsageNanoCores", operatingSystem) + if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil? + metricDataItems.push(cpuUsageNanoSecondsRate) + end + metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", "memoryWorkingSetBytes")) - def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollect, metricNametoReturn) - metricItems = [] - clusterId = KubernetesApiClient.getClusterId - timeDifference = (DateTime.now.to_time.to_i - @@telemetryMemoryMetricTimeTracker).abs - timeDifferenceInMinutes = timeDifference/60 - begin - metricInfo = metricJSON - metricInfo['pods'].each do |pod| - podUid = pod['podRef']['uid'] - podName = pod['podRef']['name'] - podNamespace = pod['podRef']['namespace'] - if (!pod['containers'].nil?) - pod['containers'].each do |container| - containerName = container['name'] - metricValue = container['memory'][memoryMetricNameToCollect] - metricTime = container['memory']['time'] - - metricItem = {} - metricItem['DataItems'] = [] - - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = hostName - metricProps['ObjectName'] = "K8SContainer" - metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName - - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = metricValue - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - metricItems.push(metricItem) - #Telemetry about agent performance - begin - # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers - # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use - if (podName.downcase.start_with?('omsagent-') && podNamespace.eql?("kube-system") && containerName.downcase.start_with?('omsagent') && metricNametoReturn.eql?("memoryRssBytes")) - if (timeDifferenceInMinutes >= 10) - telemetryProps = {} - telemetryProps['PodName'] = podName - telemetryProps['ContainerName'] = containerName - ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) - end - end - rescue => errorStr - $log.warn("Exception while generating Telemetry from getcontainerMemoryMetricItems failed: #{errorStr} for metric #{memoryMetricNameToCollect}") - end - end - end - end - # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs) - if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("memoryRssBytes")) - @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i - end - rescue => error - @Log.warn("getcontainerMemoryMetricItems failed: #{error} for metric #{memoryMetricNameToCollect}") - @Log.warn metricJSON - return metricItems - end - return metricItems - end + metricDataItems.push(getNodeLastRebootTimeMetric(metricInfo, hostName, "restartTimeEpoch")) + + # Disabling networkRxRate and networkTxRate since we dont use it as of now. + #metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes")) + #metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "txBytes", "networkTxBytes")) + # networkRxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "rxBytes", "networkRxBytesPerSec") + # if networkRxRate && !networkRxRate.empty? && !networkRxRate.nil? + # metricDataItems.push(networkRxRate) + # end + # networkTxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "txBytes", "networkTxBytesPerSec") + # if networkTxRate && !networkTxRate.empty? && !networkTxRate.nil? + # metricDataItems.push(networkTxRate) + # end + else + @Log.warn("Couldn't get metric information for host: #{hostName}") + end + rescue => error + @Log.warn("getContainerMetrics failed: #{error}") + return metricDataItems + end + return metricDataItems + end + + def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn) + metricItems = [] + clusterId = KubernetesApiClient.getClusterId + timeDifference = (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + begin + metricInfo = metricJSON + metricInfo["pods"].each do |pod| + podUid = pod["podRef"]["uid"] + podName = pod["podRef"]["name"] + podNamespace = pod["podRef"]["namespace"] + + if (!pod["containers"].nil?) + pod["containers"].each do |container| + #cpu metric + containerName = container["name"] + metricValue = container["cpu"][cpuMetricNameToCollect] + metricTime = container["cpu"]["time"] + metricItem = {} + metricItem["DataItems"] = [] - def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn) - metricItem = {} - clusterId = KubernetesApiClient.getClusterId - begin - metricInfo = metricJSON - node = metricInfo['node'] - nodeName = node['nodeName'] - - - metricValue = node[metricCategory][metricNameToCollect] - metricTime = node[metricCategory]['time'] - - metricItem['DataItems'] = [] - - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = hostName - metricProps['ObjectName'] = "K8SNode" - metricProps['InstanceName'] = clusterId + "/" + nodeName - - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = metricValue - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - - rescue => error - @Log.warn("getNodeMetricItem failed: #{error} for metric #{metricNameToCollect}") - @Log.warn metricJSON - return metricItem - end - return metricItem + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = hostName + metricProps["ObjectName"] = "K8SContainer" + metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = metricValue + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + metricItems.push(metricItem) + #Telemetry about agent performance + begin + # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers + # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use + if (podName.downcase.start_with?("omsagent-") && podNamespace.eql?("kube-system") && containerName.downcase.start_with?("omsagent") && metricNametoReturn.eql?("cpuUsageNanoCores")) + if (timeDifferenceInMinutes >= 10) + telemetryProps = {} + telemetryProps["PodName"] = podName + telemetryProps["ContainerName"] = containerName + telemetryProps["Computer"] = hostName + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) + end end + rescue => errorStr + $log.warn("Exception while generating Telemetry from getcontainerCpuMetricItems failed: #{errorStr} for metric #{cpuMetricNameToCollect}") + end + end + end + end + # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs) + if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("cpuUsageNanoCores")) + @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i + end + rescue => error + @Log.warn("getcontainerCpuMetricItems failed: #{error} for metric #{cpuMetricNameToCollect}") + return metricItems + end + return metricItems + end + + def clearDeletedWinContainersFromCache() + begin + winCpuUsageNanoSecondsKeys = @@winContainerCpuUsageNanoSecondsLast.keys + winCpuUsageNanoSecondsTimeKeys = @@winContainerCpuUsageNanoSecondsTimeLast.keys + + # Find the container ids to be deleted from cache + winContainersToBeCleared = winCpuUsageNanoSecondsKeys - @@winContainerIdCache + if winContainersToBeCleared.length > 0 + @Log.warn "Stale containers found in cache, clearing...: #{winContainersToBeCleared}" + end + winContainersToBeCleared.each do |containerId| + @@winContainerCpuUsageNanoSecondsLast.delete(containerId) + @@winContainerCpuUsageNanoSecondsTimeLast.delete(containerId) + end + rescue => errorStr + @Log.warn("clearDeletedWinContainersFromCache failed: #{errorStr}") + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + + def resetWinContainerIdCache + @@winContainerIdCache = [] + end + + # usageNanoCores doesnt exist for windows nodes. Hence need to compute this from usageCoreNanoSeconds + def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect, metricNametoReturn) + metricItems = [] + clusterId = KubernetesApiClient.getClusterId + timeDifference = (DateTime.now.to_time.to_i - @@telemetryCpuMetricTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + @Log.warn "in host: #{hostName}" + begin + metricInfo = metricJSON + containerCount = 0 + metricInfo["pods"].each do |pod| + podUid = pod["podRef"]["uid"] + podName = pod["podRef"]["name"] + podNamespace = pod["podRef"]["namespace"] + + if (!pod["containers"].nil?) + pod["containers"].each do |container| + #cpu metric + containerCount += 1 + containerName = container["name"] + metricValue = container["cpu"][cpuMetricNameToCollect] + metricTime = container["cpu"]["time"] + metricItem = {} + metricItem["DataItems"] = [] + + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = hostName + metricProps["ObjectName"] = "K8SContainer" + metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn - def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn) - metricItem = {} - clusterId = KubernetesApiClient.getClusterId - begin - - metricInfo = metricJSON - node = metricInfo['node'] - nodeName = node['nodeName'] - - metricValue = node[metricCategory][metricNameToCollect] - metricTime = node[metricCategory]['time'] - - if !(metricNameToCollect == "rxBytes" || metricNameToCollect == "txBytes" || metricNameToCollect == "usageCoreNanoSeconds" ) - @Log.warn("getNodeMetricItemRate : rateMetric is supported only for rxBytes, txBytes & usageCoreNanoSeconds and not for #{metricNameToCollect}") - return nil - elsif metricNameToCollect == "rxBytes" - if @@rxBytesLast.nil? || @@rxBytesTimeLast.nil? || @@rxBytesLast > metricValue #when kubelet is restarted the last condition will be true - @@rxBytesLast = metricValue - @@rxBytesTimeLast = metricTime - return nil - else - metricRateValue = ((metricValue - @@rxBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@rxBytesTimeLast).to_time) - @@rxBytesLast = metricValue - @@rxBytesTimeLast = metricTime - metricValue = metricRateValue - end - elsif metricNameToCollect == "txBytes" - if @@txBytesLast.nil? || @@txBytesTimeLast.nil? || @@txBytesLast > metricValue #when kubelet is restarted the last condition will be true - @@txBytesLast = metricValue - @@txBytesTimeLast = metricTime - return nil - else - metricRateValue = ((metricValue - @@txBytesLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@txBytesTimeLast).to_time) - @@txBytesLast = metricValue - @@txBytesTimeLast = metricTime - metricValue = metricRateValue - end - else - if @@nodeCpuUsageNanoSecondsLast.nil? || @@nodeCpuUsageNanoSecondsTimeLast.nil? || @@nodeCpuUsageNanoSecondsLast > metricValue #when kubelet is restarted the last condition will be true - @@nodeCpuUsageNanoSecondsLast = metricValue - @@nodeCpuUsageNanoSecondsTimeLast = metricTime - return nil - else - metricRateValue = ((metricValue - @@nodeCpuUsageNanoSecondsLast) * 1.0)/(DateTime.parse(metricTime).to_time - DateTime.parse(@@nodeCpuUsageNanoSecondsTimeLast).to_time) - @@nodeCpuUsageNanoSecondsLast = metricValue - @@nodeCpuUsageNanoSecondsTimeLast = metricTime - metricValue = metricRateValue - end - end - - metricItem['DataItems'] = [] - - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = hostName - metricProps['ObjectName'] = "K8SNode" - metricProps['InstanceName'] = clusterId + "/" + nodeName - - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = metricValue - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - - rescue => error - @Log.warn("getNodeMetricItemRate failed: #{error} for metric #{metricNameToCollect}") - @Log.warn metricJSON - return nil - end - return metricItem + containerId = podUid + "/" + containerName + # Adding the containers to the winContainerIdCache so that it can be used by the cleanup routine + # to clear the delted containers every 5 minutes + @@winContainerIdCache.push(containerId) + if @@winContainerCpuUsageNanoSecondsLast[containerId].nil? || @@winContainerCpuUsageNanoSecondsTimeLast[containerId].nil? || @@winContainerCpuUsageNanoSecondsLast[containerId] > metricValue #when kubelet is restarted the last condition will be true + @@winContainerCpuUsageNanoSecondsLast[containerId] = metricValue + @@winContainerCpuUsageNanoSecondsTimeLast[containerId] = metricTime + next + else + timeDifference = DateTime.parse(metricTime).to_time - DateTime.parse(@@winContainerCpuUsageNanoSecondsTimeLast[containerId]).to_time + containerCpuUsageDifference = metricValue - @@winContainerCpuUsageNanoSecondsLast[containerId] + # containerCpuUsageDifference check is added to make sure we report non zero values when cadvisor returns same values for subsequent calls + if timeDifference != 0 && containerCpuUsageDifference != 0 + metricRateValue = (containerCpuUsageDifference * 1.0) / timeDifference + else + @Log.info "container - cpu usage difference / time difference is 0, hence using previous cached value" + if !@@winContainerPrevMetricRate[containerId].nil? + metricRateValue = @@winContainerPrevMetricRate[containerId] + else + # This can happen when the metric value returns same values for subsequent calls when the plugin first starts + metricRateValue = 0 + end end + @@winContainerCpuUsageNanoSecondsLast[containerId] = metricValue + @@winContainerCpuUsageNanoSecondsTimeLast[containerId] = metricTime + metricValue = metricRateValue + @@winContainerPrevMetricRate[containerId] = metricRateValue + end - def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn) - metricItem = {} - clusterId = KubernetesApiClient.getClusterId - - begin - metricInfo = metricJSON - node = metricInfo['node'] - nodeName = node['nodeName'] - - - metricValue = node['startTime'] - metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z - - metricItem['DataItems'] = [] - - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = hostName - metricProps['ObjectName'] = "K8SNode" - metricProps['InstanceName'] = clusterId + "/" + nodeName - - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - #Read it from /proc/uptime - metricCollections['Value'] = DateTime.parse(metricTime).to_time.to_i - IO.read("/proc/uptime").split[0].to_f - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - - rescue => error - @Log.warn("getNodeLastRebootTimeMetric failed: #{error} ") - @Log.warn metricJSON - return metricItem - end - return metricItem + metricCollections["Value"] = metricValue + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + metricItems.push(metricItem) + end + end + end + #Sending ContainerInventoryTelemetry from replicaset for telemetry purposes + if @@nodeTelemetryTimeTracker[hostName].nil? + @@nodeTelemetryTimeTracker[hostName] = DateTime.now.to_time.to_i + else + timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker[hostName]).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= 5) + @@nodeTelemetryTimeTracker[hostName] = DateTime.now.to_time.to_i + telemetryProperties = {} + telemetryProperties["Computer"] = hostName + telemetryProperties["ContainerCount"] = containerCount + # Hardcoding the event to ContainerInventory hearbeat event since the telemetry is pivoted off of this event. + @Log.info "sending container inventory heartbeat telemetry" + ApplicationInsightsUtility.sendCustomEvent("ContainerInventoryHeartBeatEvent", telemetryProperties) + end + end + rescue => error + @Log.warn("getcontainerCpuMetricItemRate failed: #{error} for metric #{cpuMetricNameToCollect}") + return metricItems + end + return metricItems + end + + def getContainerMemoryMetricItems(metricJSON, hostName, memoryMetricNameToCollect, metricNametoReturn) + metricItems = [] + clusterId = KubernetesApiClient.getClusterId + timeDifference = (DateTime.now.to_time.to_i - @@telemetryMemoryMetricTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + begin + metricInfo = metricJSON + metricInfo["pods"].each do |pod| + podUid = pod["podRef"]["uid"] + podName = pod["podRef"]["name"] + podNamespace = pod["podRef"]["namespace"] + if (!pod["containers"].nil?) + pod["containers"].each do |container| + containerName = container["name"] + metricValue = container["memory"][memoryMetricNameToCollect] + metricTime = container["memory"]["time"] + + metricItem = {} + metricItem["DataItems"] = [] + + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = hostName + metricProps["ObjectName"] = "K8SContainer" + metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = metricValue + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + metricItems.push(metricItem) + #Telemetry about agent performance + begin + # we can only do this much now. Ideally would like to use the docker image repository to find our pods/containers + # cadvisor does not have pod/container metadata. so would need more work to cache as pv & use + if (podName.downcase.start_with?("omsagent-") && podNamespace.eql?("kube-system") && containerName.downcase.start_with?("omsagent") && metricNametoReturn.eql?("memoryRssBytes")) + if (timeDifferenceInMinutes >= 10) + telemetryProps = {} + telemetryProps["PodName"] = podName + telemetryProps["ContainerName"] = containerName + telemetryProps["Computer"] = hostName + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) + end end + rescue => errorStr + $log.warn("Exception while generating Telemetry from getcontainerMemoryMetricItems failed: #{errorStr} for metric #{memoryMetricNameToCollect}") + end + end + end + end + # reset time outside pod iterator as we use one timer per metric for 2 pods (ds & rs) + if (timeDifferenceInMinutes >= 10 && metricNametoReturn.eql?("memoryRssBytes")) + @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i + end + rescue => error + @Log.warn("getcontainerMemoryMetricItems failed: #{error} for metric #{memoryMetricNameToCollect}") + @Log.warn metricJSON + return metricItems + end + return metricItems + end + + def getNodeMetricItem(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn) + metricItem = {} + clusterId = KubernetesApiClient.getClusterId + begin + metricInfo = metricJSON + node = metricInfo["node"] + nodeName = node["nodeName"] + + if !node[metricCategory].nil? + metricValue = node[metricCategory][metricNameToCollect] + metricTime = node[metricCategory]["time"] + + metricItem["DataItems"] = [] + + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = hostName + metricProps["ObjectName"] = "K8SNode" + metricProps["InstanceName"] = clusterId + "/" + nodeName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = metricValue + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + end + rescue => error + @Log.warn("getNodeMetricItem failed: #{error} for metric #{metricNameToCollect}") + @Log.warn metricJSON + return metricItem + end + return metricItem + end + + def getNodeMetricItemRate(metricJSON, hostName, metricCategory, metricNameToCollect, metricNametoReturn, operatingSystem) + metricItem = {} + clusterId = KubernetesApiClient.getClusterId + begin + metricInfo = metricJSON + node = metricInfo["node"] + nodeName = node["nodeName"] - def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn) - metricItems = [] - clusterId = KubernetesApiClient.getClusterId - currentTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z - begin - metricInfo = metricJSON - metricInfo['pods'].each do |pod| - podUid = pod['podRef']['uid'] - if (!pod['containers'].nil?) - pod['containers'].each do |container| - containerName = container['name'] - metricValue = container['startTime'] - metricTime = currentTime - - metricItem = {} - metricItem['DataItems'] = [] - - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = hostName - metricProps['ObjectName'] = "K8SContainer" - metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName - - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = DateTime.parse(metricValue).to_time.to_i - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - metricItems.push(metricItem) - end - end - end - rescue => error - @Log.warn("getContainerStartTimeMetric failed: #{error} for metric #{metricNametoReturn}") - @Log.warn metricJSON - return metricItems - end - return metricItems + if !node[metricCategory].nil? + metricValue = node[metricCategory][metricNameToCollect] + metricTime = node[metricCategory]["time"] + + # if !(metricNameToCollect == "rxBytes" || metricNameToCollect == "txBytes" || metricNameToCollect == "usageCoreNanoSeconds") + # @Log.warn("getNodeMetricItemRate : rateMetric is supported only for rxBytes, txBytes & usageCoreNanoSeconds and not for #{metricNameToCollect}") + if !(metricNameToCollect == "usageCoreNanoSeconds") + @Log.warn("getNodeMetricItemRate : rateMetric is supported only for usageCoreNanoSeconds and not for #{metricNameToCollect}") + return nil + # elsif metricNameToCollect == "rxBytes" + # if @@rxBytesLast.nil? || @@rxBytesTimeLast.nil? || @@rxBytesLast > metricValue #when kubelet is restarted the last condition will be true + # @@rxBytesLast = metricValue + # @@rxBytesTimeLast = metricTime + # return nil + # else + # metricRateValue = ((metricValue - @@rxBytesLast) * 1.0) / (DateTime.parse(metricTime).to_time - DateTime.parse(@@rxBytesTimeLast).to_time) + # @@rxBytesLast = metricValue + # @@rxBytesTimeLast = metricTime + # metricValue = metricRateValue + # end + # elsif metricNameToCollect == "txBytes" + # if @@txBytesLast.nil? || @@txBytesTimeLast.nil? || @@txBytesLast > metricValue #when kubelet is restarted the last condition will be true + # @@txBytesLast = metricValue + # @@txBytesTimeLast = metricTime + # return nil + # else + # metricRateValue = ((metricValue - @@txBytesLast) * 1.0) / (DateTime.parse(metricTime).to_time - DateTime.parse(@@txBytesTimeLast).to_time) + # @@txBytesLast = metricValue + # @@txBytesTimeLast = metricTime + # metricValue = metricRateValue + # end + else + if operatingSystem == "Linux" + if @@nodeCpuUsageNanoSecondsLast.nil? || @@nodeCpuUsageNanoSecondsTimeLast.nil? || @@nodeCpuUsageNanoSecondsLast > metricValue #when kubelet is restarted the last condition will be true + @@nodeCpuUsageNanoSecondsLast = metricValue + @@nodeCpuUsageNanoSecondsTimeLast = metricTime + return nil + else + timeDifference = DateTime.parse(metricTime).to_time - DateTime.parse(@@nodeCpuUsageNanoSecondsTimeLast).to_time + nodeCpuUsageDifference = metricValue - @@nodeCpuUsageNanoSecondsLast + # nodeCpuUsageDifference check is added to make sure we report non zero values when cadvisor returns same values for subsequent calls + if timeDifference != 0 && nodeCpuUsageDifference != 0 + metricRateValue = (nodeCpuUsageDifference * 1.0) / timeDifference + else + @Log.info "linux node - cpu usage difference / time difference is 0, hence using previous cached value" + if !@@linuxNodePrevMetricRate.nil? + metricRateValue = @@linuxNodePrevMetricRate + else + # This can happen when the metric value returns same values for subsequent calls when the plugin first starts + metricRateValue = 0 + end + end + @@nodeCpuUsageNanoSecondsLast = metricValue + @@nodeCpuUsageNanoSecondsTimeLast = metricTime + @@linuxNodePrevMetricRate = metricRateValue + metricValue = metricRateValue + end + elsif operatingSystem == "Windows" + # Using the hash for windows nodes since this is running in replica set and there can be multiple nodes + if @@winNodeCpuUsageNanoSecondsLast[hostName].nil? || @@winNodeCpuUsageNanoSecondsTimeLast[hostName].nil? || @@winNodeCpuUsageNanoSecondsLast[hostName] > metricValue #when kubelet is restarted the last condition will be true + @@winNodeCpuUsageNanoSecondsLast[hostName] = metricValue + @@winNodeCpuUsageNanoSecondsTimeLast[hostName] = metricTime + return nil + else + timeDifference = DateTime.parse(metricTime).to_time - DateTime.parse(@@winNodeCpuUsageNanoSecondsTimeLast[hostName]).to_time + nodeCpuUsageDifference = metricValue - @@winNodeCpuUsageNanoSecondsLast[hostName] + # nodeCpuUsageDifference check is added to make sure we report non zero values when cadvisor returns same values for subsequent calls + if timeDifference != 0 && nodeCpuUsageDifference != 0 + metricRateValue = (nodeCpuUsageDifference * 1.0) / timeDifference + else + @Log.info "windows node - cpu usage difference / time difference is 0, hence using previous cached value" + if !@@winNodePrevMetricRate[hostName].nil? + metricRateValue = @@winNodePrevMetricRate[hostName] + else + # This can happen when the metric value returns same values for subsequent calls when the plugin first starts + metricRateValue = 0 + end end + @@winNodeCpuUsageNanoSecondsLast[hostName] = metricValue + @@winNodeCpuUsageNanoSecondsTimeLast[hostName] = metricTime + @@winNodePrevMetricRate[hostName] = metricRateValue + metricValue = metricRateValue + end + end + end + metricItem["DataItems"] = [] + + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = hostName + metricProps["ObjectName"] = "K8SNode" + metricProps["InstanceName"] = clusterId + "/" + nodeName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = metricValue + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + end + rescue => error + @Log.warn("getNodeMetricItemRate failed: #{error} for metric #{metricNameToCollect}") + @Log.warn metricJSON + return nil + end + return metricItem + end + + def getNodeLastRebootTimeMetric(metricJSON, hostName, metricNametoReturn) + metricItem = {} + clusterId = KubernetesApiClient.getClusterId + + begin + metricInfo = metricJSON + node = metricInfo["node"] + nodeName = node["nodeName"] + + metricValue = node["startTime"] + metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z + + metricItem["DataItems"] = [] + + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = hostName + metricProps["ObjectName"] = "K8SNode" + metricProps["InstanceName"] = clusterId + "/" + nodeName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + #Read it from /proc/uptime + metricCollections["Value"] = DateTime.parse(metricTime).to_time.to_i - IO.read("/proc/uptime").split[0].to_f + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + rescue => error + @Log.warn("getNodeLastRebootTimeMetric failed: #{error} ") + @Log.warn metricJSON + return metricItem + end + return metricItem + end + + def getContainerStartTimeMetricItems(metricJSON, hostName, metricNametoReturn) + metricItems = [] + clusterId = KubernetesApiClient.getClusterId + currentTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z + begin + metricInfo = metricJSON + metricInfo["pods"].each do |pod| + podUid = pod["podRef"]["uid"] + if (!pod["containers"].nil?) + pod["containers"].each do |container| + containerName = container["name"] + metricValue = container["startTime"] + metricTime = currentTime + + metricItem = {} + metricItem["DataItems"] = [] + + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = hostName + metricProps["ObjectName"] = "K8SContainer" + metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = DateTime.parse(metricValue).to_time.to_i + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + metricItems.push(metricItem) end + end end + rescue => error + @Log.warn("getContainerStartTimeMetric failed: #{error} for metric #{metricNametoReturn}") + @Log.warn metricJSON + return metricItems + end + return metricItems + end + end +end diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb index a1e143b15..4ed85025f 100644 --- a/source/code/plugin/KubernetesApiClient.rb +++ b/source/code/plugin/KubernetesApiClient.rb @@ -2,474 +2,516 @@ # frozen_string_literal: true class KubernetesApiClient + require "json" + require "logger" + require "net/http" + require "net/https" + require "uri" + require "time" - require 'json' - require 'logger' - require 'net/http' - require 'net/https' - require 'uri' - require 'time' - - require_relative 'oms_common' - - @@ApiVersion = "v1" - @@CaFile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - @@ClusterName = nil - @@ClusterId = nil - @@IsNodeMaster = nil - #@@IsValidRunningNode = nil - #@@IsLinuxCluster = nil - @@KubeSystemNamespace = "kube-system" - @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt" - @Log = Logger.new(@LogPath, 2, 10*1048576) #keep last 2 files, max log file size = 10M - @@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token" - @@TokenStr = nil - @@NodeMetrics = Hash.new - - def initialize + require_relative "oms_common" + + @@ApiVersion = "v1" + @@CaFile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + @@ClusterName = nil + @@ClusterId = nil + @@IsNodeMaster = nil + #@@IsValidRunningNode = nil + #@@IsLinuxCluster = nil + @@KubeSystemNamespace = "kube-system" + @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt" + @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M + @@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token" + @@TokenStr = nil + @@NodeMetrics = Hash.new + @@WinNodeArray = [] + + def initialize + end + + class << self + def getKubeResourceInfo(resource) + headers = {} + response = nil + @Log.info "Getting Kube resource" + @Log.info resource + begin + resourceUri = getResourceUri(resource) + if !resourceUri.nil? + uri = URI.parse(resourceUri) + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true + if !File.exist?(@@CaFile) + raise "#{@@CaFile} doesnt exist" + else + http.ca_file = @@CaFile if File.exist?(@@CaFile) + end + http.verify_mode = OpenSSL::SSL::VERIFY_PEER + + kubeApiRequest = Net::HTTP::Get.new(uri.request_uri) + kubeApiRequest["Authorization"] = "Bearer " + getTokenStr + @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}" + response = http.request(kubeApiRequest) + @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}" end + rescue => error + @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}") + end + if (response.body.empty?) + @Log.warn("KubernetesAPIClient::getKubeResourceInfo : Got empty response from Kube API for #{resource} @ #{Time.now.utc.iso8601}") + end + return response + end - class << self - def getKubeResourceInfo(resource) - headers = {} - response = nil - @Log.info 'Getting Kube resource' - @Log.info resource - begin - resourceUri = getResourceUri(resource) - if !resourceUri.nil? - uri = URI.parse(resourceUri) - http = Net::HTTP.new(uri.host, uri.port) - http.use_ssl = true - if !File.exist?(@@CaFile) - raise "#{@@CaFile} doesnt exist" - else - http.ca_file = @@CaFile if File.exist?(@@CaFile) - end - http.verify_mode = OpenSSL::SSL::VERIFY_PEER - - kubeApiRequest = Net::HTTP::Get.new(uri.request_uri) - kubeApiRequest['Authorization'] = "Bearer " + getTokenStr - @Log.info "KubernetesAPIClient::getKubeResourceInfo : Making request to #{uri.request_uri} @ #{Time.now.utc.iso8601}" - response = http.request(kubeApiRequest) - @Log.info "KubernetesAPIClient::getKubeResourceInfo : Got response of #{response.code} for #{uri.request_uri} @ #{Time.now.utc.iso8601}" - end - rescue => error - @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}") - end - if (response.body.empty?) - @Log.warn("KubernetesAPIClient::getKubeResourceInfo : Got empty response from Kube API for #{resource} @ #{Time.now.utc.iso8601}") - end - return response - end + def getTokenStr + return @@TokenStr if !@@TokenStr.nil? + begin + if File.exist?(@@TokenFileName) && File.readable?(@@TokenFileName) + @@TokenStr = File.read(@@TokenFileName).strip + return @@TokenStr + else + @Log.warn("Unable to read token string from #{@@TokenFileName}: #{error}") + return nil + end + end + end - def getTokenStr - return @@TokenStr if !@@TokenStr.nil? - begin - if File.exist?(@@TokenFileName) && File.readable?(@@TokenFileName) - @@TokenStr = File.read(@@TokenFileName).strip - return @@TokenStr - else - @Log.warn("Unable to read token string from #{@@TokenFileName}: #{error}") - return nil - end - end - end + def getResourceUri(resource) + begin + if ENV["KUBERNETES_SERVICE_HOST"] && ENV["KUBERNETES_PORT_443_TCP_PORT"] + return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/api/" + @@ApiVersion + "/" + resource + else + @Log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV["KUBERNETES_SERVICE_HOST"]} KUBERNETES_PORT_443_TCP_PORT: #{ENV["KUBERNETES_PORT_443_TCP_PORT"]}. Unable to form resourceUri") + return nil + end + end + end - def getResourceUri(resource) - begin - if ENV['KUBERNETES_SERVICE_HOST'] && ENV['KUBERNETES_PORT_443_TCP_PORT'] - return "https://#{ENV['KUBERNETES_SERVICE_HOST']}:#{ENV['KUBERNETES_PORT_443_TCP_PORT']}/api/" + @@ApiVersion + "/" + resource - else - @Log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV['KUBERNETES_SERVICE_HOST']} KUBERNETES_PORT_443_TCP_PORT: #{ENV['KUBERNETES_PORT_443_TCP_PORT']}. Unable to form resourceUri") - return nil - end + def getClusterName + return @@ClusterName if !@@ClusterName.nil? + @@ClusterName = "None" + begin + #try getting resource ID for aks + cluster = ENV["AKS_RESOURCE_ID"] + if cluster && !cluster.nil? && !cluster.empty? + @@ClusterName = cluster.split("/").last + else + cluster = ENV["ACS_RESOURCE_NAME"] + if cluster && !cluster.nil? && !cluster.empty? + @@ClusterName = cluster + else + kubesystemResourceUri = "namespaces/" + @@KubeSystemNamespace + "/pods" + @Log.info("KubernetesApiClient::getClusterName : Getting pods from Kube API @ #{Time.now.utc.iso8601}") + podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) + @Log.info("KubernetesApiClient::getClusterName : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") + podInfo["items"].each do |items| + if items["metadata"]["name"].include? "kube-controller-manager" + items["spec"]["containers"][0]["command"].each do |command| + if command.include? "--cluster-name" + @@ClusterName = command.split("=")[1] + end end + end end + end + end + rescue => error + @Log.warn("getClusterName failed: #{error}") + end + return @@ClusterName + end - def getClusterName - return @@ClusterName if !@@ClusterName.nil? - @@ClusterName = "None" - begin - #try getting resource ID for aks - cluster = ENV['AKS_RESOURCE_ID'] - if cluster && !cluster.nil? && !cluster.empty? - @@ClusterName = cluster.split("/").last - else - cluster = ENV['ACS_RESOURCE_NAME'] - if cluster && !cluster.nil? && !cluster.empty? - @@ClusterName = cluster - else - kubesystemResourceUri = "namespaces/" + @@KubeSystemNamespace + "/pods" - @Log.info("KubernetesApiClient::getClusterName : Getting pods from Kube API @ #{Time.now.utc.iso8601}") - podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) - @Log.info("KubernetesApiClient::getClusterName : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") - podInfo['items'].each do |items| - if items['metadata']['name'].include? "kube-controller-manager" - items['spec']['containers'][0]['command'].each do |command| - if command.include? "--cluster-name" - @@ClusterName = command.split('=')[1] - end - end - end - end - end - end - rescue => error - @Log.warn("getClusterName failed: #{error}") - end - return @@ClusterName - end + def getClusterId + return @@ClusterId if !@@ClusterId.nil? + #By default initialize ClusterId to ClusterName. + # In ACS/On-prem, we need to figure out how we can generate ClusterId + @@ClusterId = getClusterName + begin + cluster = ENV["AKS_RESOURCE_ID"] + if cluster && !cluster.nil? && !cluster.empty? + @@ClusterId = cluster + end + rescue => error + @Log.warn("getClusterId failed: #{error}") + end + return @@ClusterId + end - def getClusterId - return @@ClusterId if !@@ClusterId.nil? - #By default initialize ClusterId to ClusterName. - # In ACS/On-prem, we need to figure out how we can generate ClusterId - @@ClusterId = getClusterName - begin - cluster = ENV['AKS_RESOURCE_ID'] - if cluster && !cluster.nil? && !cluster.empty? - @@ClusterId = cluster - end - rescue => error - @Log.warn("getClusterId failed: #{error}") - end - return @@ClusterId + def isNodeMaster + return @@IsNodeMaster if !@@IsNodeMaster.nil? + @@IsNodeMaster = false + begin + @Log.info("KubernetesApiClient::isNodeMaster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") + allNodesInfo = JSON.parse(getKubeResourceInfo("nodes").body) + @Log.info("KubernetesApiClient::isNodeMaster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") + if !allNodesInfo.nil? && !allNodesInfo.empty? + thisNodeName = OMS::Common.get_hostname + allNodesInfo["items"].each do |item| + if item["metadata"]["name"].casecmp(thisNodeName) == 0 + if item["metadata"]["labels"]["kubernetes.io/role"].to_s.include?("master") || item["metadata"]["labels"]["role"].to_s.include?("master") + @@IsNodeMaster = true + end + break end + end + end + rescue => error + @Log.warn("KubernetesApiClient::isNodeMaster : node role request failed: #{error}") + end - def isNodeMaster - return @@IsNodeMaster if !@@IsNodeMaster.nil? - @@IsNodeMaster = false - begin - @Log.info("KubernetesApiClient::isNodeMaster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") - allNodesInfo = JSON.parse(getKubeResourceInfo('nodes').body) - @Log.info("KubernetesApiClient::isNodeMaster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") - if !allNodesInfo.nil? && !allNodesInfo.empty? - thisNodeName = OMS::Common.get_hostname - allNodesInfo['items'].each do |item| - if item['metadata']['name'].casecmp(thisNodeName) == 0 - if item['metadata']['labels']["kubernetes.io/role"].to_s.include?("master") || item['metadata']['labels']["role"].to_s.include?("master") - @@IsNodeMaster = true - end - break - end - end - end - rescue => error - @Log.warn("KubernetesApiClient::isNodeMaster : node role request failed: #{error}") - end - - return @@IsNodeMaster - end + return @@IsNodeMaster + end - #def isValidRunningNode - # return @@IsValidRunningNode if !@@IsValidRunningNode.nil? - # @@IsValidRunningNode = false - # begin - # thisNodeName = OMS::Common.get_hostname - # if isLinuxCluster - # # Run on agent node [0] - # @@IsValidRunningNode = !isNodeMaster && thisNodeName.to_s.split('-').last == '0' - # else - # # Run on master node [0] - # @@IsValidRunningNode = isNodeMaster && thisNodeName.to_s.split('-').last == '0' - # end - # rescue => error - # @Log.warn("Checking Node Type failed: #{error}") - # end - # if(@@IsValidRunningNode == true) - # @Log.info("Electing current node to talk to k8 api") - # else - # @Log.info("Not Electing current node to talk to k8 api") - # end - # return @@IsValidRunningNode - #end - - #def isLinuxCluster - # return @@IsLinuxCluster if !@@IsLinuxCluster.nil? - # @@IsLinuxCluster = true - # begin - # @Log.info("KubernetesApiClient::isLinuxCluster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") - # allNodesInfo = JSON.parse(getKubeResourceInfo('nodes').body) - # @Log.info("KubernetesApiClient::isLinuxCluster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") - # if !allNodesInfo.nil? && !allNodesInfo.empty? - # allNodesInfo['items'].each do |item| - # if !(item['status']['nodeInfo']['operatingSystem'].casecmp('linux') == 0) - # @@IsLinuxCluster = false - # break - # end - # end - # end - # rescue => error - # @Log.warn("KubernetesApiClient::isLinuxCluster : node role request failed: #{error}") - # end - # return @@IsLinuxCluster - #end - - # returns an arry of pods (json) - def getPods(namespace) - pods = [] - begin - kubesystemResourceUri = "namespaces/" + namespace + "/pods" - podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) - podInfo['items'].each do |items| - pods.push items - end - rescue => error - @Log.warn("List pods request failed: #{error}") - end - return pods - end + #def isValidRunningNode + # return @@IsValidRunningNode if !@@IsValidRunningNode.nil? + # @@IsValidRunningNode = false + # begin + # thisNodeName = OMS::Common.get_hostname + # if isLinuxCluster + # # Run on agent node [0] + # @@IsValidRunningNode = !isNodeMaster && thisNodeName.to_s.split('-').last == '0' + # else + # # Run on master node [0] + # @@IsValidRunningNode = isNodeMaster && thisNodeName.to_s.split('-').last == '0' + # end + # rescue => error + # @Log.warn("Checking Node Type failed: #{error}") + # end + # if(@@IsValidRunningNode == true) + # @Log.info("Electing current node to talk to k8 api") + # else + # @Log.info("Not Electing current node to talk to k8 api") + # end + # return @@IsValidRunningNode + #end + + #def isLinuxCluster + # return @@IsLinuxCluster if !@@IsLinuxCluster.nil? + # @@IsLinuxCluster = true + # begin + # @Log.info("KubernetesApiClient::isLinuxCluster : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") + # allNodesInfo = JSON.parse(getKubeResourceInfo('nodes').body) + # @Log.info("KubernetesApiClient::isLinuxCluster : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") + # if !allNodesInfo.nil? && !allNodesInfo.empty? + # allNodesInfo['items'].each do |item| + # if !(item['status']['nodeInfo']['operatingSystem'].casecmp('linux') == 0) + # @@IsLinuxCluster = false + # break + # end + # end + # end + # rescue => error + # @Log.warn("KubernetesApiClient::isLinuxCluster : node role request failed: #{error}") + # end + # return @@IsLinuxCluster + #end + + # returns an arry of pods (json) + def getPods(namespace) + pods = [] + begin + kubesystemResourceUri = "namespaces/" + namespace + "/pods" + podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) + podInfo["items"].each do |items| + pods.push items + end + rescue => error + @Log.warn("List pods request failed: #{error}") + end + return pods + end - def getContainerIDs(namespace) - containers = Hash.new - begin - kubesystemResourceUri = "namespaces/" + namespace + "/pods" - @Log.info("KubernetesApiClient::getContainerIDs : Getting pods from Kube API @ #{Time.now.utc.iso8601}") - podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) - @Log.info("KubernetesApiClient::getContainerIDs : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") - podInfo['items'].each do |item| - if (!item['status'].nil? && !item['status'].empty? && !item['status']['containerStatuses'].nil? && !item['status']['containerStatuses'].empty?) - item['status']['containerStatuses'].each do |cntr| - containers[cntr['containerID']] = "kube-system" - end - end - end - rescue => error - @Log.warn("KubernetesApiClient::getContainerIDs : List ContainerIDs request failed: #{error}") + # returns a hash of windows node names and their internal IPs + def getWindowsNodes + winNodes = [] + begin + nodeInventory = JSON.parse(getKubeResourceInfo("nodes").body) + @Log.info "KubernetesAPIClient::getWindowsNodes : Got nodes from kube api" + # Resetting the windows node cache + @@WinNodeArray.clear + if (!nodeInventory.empty?) + nodeInventory["items"].each do |item| + # check for windows operating system in node metadata + winNode = {} + nodeStatus = item["status"] + nodeMetadata = item["metadata"] + if !nodeStatus.nil? && !nodeStatus["nodeInfo"].nil? && !nodeStatus["nodeInfo"]["operatingSystem"].nil? + operatingSystem = nodeStatus["nodeInfo"]["operatingSystem"] + if (operatingSystem.is_a?(String) && operatingSystem.casecmp("windows") == 0) + # Adding windows nodes to winNodeArray so that it can be used in kubepodinventory to send ContainerInventory data + # to get images and image tags for containers in windows nodes + if !nodeMetadata.nil? && !nodeMetadata["name"].nil? + @@WinNodeArray.push(nodeMetadata["name"]) end - return containers + nodeStatusAddresses = nodeStatus["addresses"] + if !nodeStatusAddresses.nil? + nodeStatusAddresses.each do |address| + winNode[address["type"]] = address["address"] + end + winNodes.push(winNode) + end + end end + end + end + return winNodes + rescue => error + @Log.warn("Error in get windows nodes: #{error}") + return nil + end + end - def getContainerLogs(namespace, pod, container, showTimeStamp) - containerLogs = "" - begin - kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container - if showTimeStamp - kubesystemResourceUri += "×tamps=true" - end - @Log.info("KubernetesApiClient::getContainerLogs : Getting logs from Kube API @ #{Time.now.utc.iso8601}") - containerLogs = getKubeResourceInfo(kubesystemResourceUri).body - @Log.info("KubernetesApiClient::getContainerLogs : Done getting logs from Kube API @ #{Time.now.utc.iso8601}") - rescue => error - @Log.warn("Pod logs request failed: #{error}") - end - return containerLogs + def getWindowsNodesArray + return @@WinNodeArray + end + + def getContainerIDs(namespace) + containers = Hash.new + begin + kubesystemResourceUri = "namespaces/" + namespace + "/pods" + @Log.info("KubernetesApiClient::getContainerIDs : Getting pods from Kube API @ #{Time.now.utc.iso8601}") + podInfo = JSON.parse(getKubeResourceInfo(kubesystemResourceUri).body) + @Log.info("KubernetesApiClient::getContainerIDs : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") + podInfo["items"].each do |item| + if (!item["status"].nil? && !item["status"].empty? && !item["status"]["containerStatuses"].nil? && !item["status"]["containerStatuses"].empty?) + item["status"]["containerStatuses"].each do |cntr| + containers[cntr["containerID"]] = "kube-system" end + end + end + rescue => error + @Log.warn("KubernetesApiClient::getContainerIDs : List ContainerIDs request failed: #{error}") + end + return containers + end + + def getContainerLogs(namespace, pod, container, showTimeStamp) + containerLogs = "" + begin + kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container + if showTimeStamp + kubesystemResourceUri += "×tamps=true" + end + @Log.info("KubernetesApiClient::getContainerLogs : Getting logs from Kube API @ #{Time.now.utc.iso8601}") + containerLogs = getKubeResourceInfo(kubesystemResourceUri).body + @Log.info("KubernetesApiClient::getContainerLogs : Done getting logs from Kube API @ #{Time.now.utc.iso8601}") + rescue => error + @Log.warn("Pod logs request failed: #{error}") + end + return containerLogs + end + + def getContainerLogsSinceTime(namespace, pod, container, since, showTimeStamp) + containerLogs = "" + begin + kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container + "&sinceTime=" + since + kubesystemResourceUri = URI.escape(kubesystemResourceUri, ":.+") # HTML URL Encoding for date + + if showTimeStamp + kubesystemResourceUri += "×tamps=true" + end + @Log.info("calling #{kubesystemResourceUri}") + @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Getting logs from Kube API @ #{Time.now.utc.iso8601}") + containerLogs = getKubeResourceInfo(kubesystemResourceUri).body + @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Done getting logs from Kube API @ #{Time.now.utc.iso8601}") + rescue => error + @Log.warn("Pod logs request failed: #{error}") + end + return containerLogs + end - def getContainerLogsSinceTime(namespace, pod, container, since, showTimeStamp) - containerLogs = "" - begin - kubesystemResourceUri = "namespaces/" + namespace + "/pods/" + pod + "/log" + "?container=" + container + "&sinceTime=" + since - kubesystemResourceUri = URI.escape(kubesystemResourceUri, ":.+") # HTML URL Encoding for date - - if showTimeStamp - kubesystemResourceUri += "×tamps=true" - end - @Log.info("calling #{kubesystemResourceUri}") - @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Getting logs from Kube API @ #{Time.now.utc.iso8601}") - containerLogs = getKubeResourceInfo(kubesystemResourceUri).body - @Log.info("KubernetesApiClient::getContainerLogsSinceTime : Done getting logs from Kube API @ #{Time.now.utc.iso8601}") - rescue => error - @Log.warn("Pod logs request failed: #{error}") + def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn) + metricItems = [] + begin + clusterId = getClusterId + metricInfo = metricJSON + metricInfo["items"].each do |pod| + podNameSpace = pod["metadata"]["namespace"] + if podNameSpace.eql?("kube-system") && !pod["metadata"].key?("ownerReferences") + # The above case seems to be the only case where you have horizontal scaling of pods + # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash + # instead of the actual poduid. Since this uid is not being surface into the UX + # its ok to use this. + # Use kubernetes.io/config.hash to be able to correlate with cadvisor data + podUid = pod["metadata"]["annotations"]["kubernetes.io/config.hash"] + else + podUid = pod["metadata"]["uid"] + end + if (!pod["spec"]["containers"].nil? && !pod["spec"]["nodeName"].nil?) + nodeName = pod["spec"]["nodeName"] + pod["spec"]["containers"].each do |container| + containerName = container["name"] + metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z + if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?) + metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect]) + + metricItem = {} + metricItem["DataItems"] = [] + + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = nodeName + metricProps["ObjectName"] = "K8SContainer" + metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = metricValue + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + metricItems.push(metricItem) + #No container level limit for the given metric, so default to node level limit + else + nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect + if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey)) + metricValue = @@NodeMetrics[nodeMetricsHashKey] + #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ") + metricItem = {} + metricItem["DataItems"] = [] + + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = nodeName + metricProps["ObjectName"] = "K8SContainer" + metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName + + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = metricValue + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + metricItems.push(metricItem) end - return containerLogs + end end + end + end + rescue => error + @Log.warn("getcontainerResourceRequestsAndLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}") + return metricItems + end + return metricItems + end #getContainerResourceRequestAndLimits - def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn) - metricItems = [] - begin - clusterId = getClusterId - metricInfo = metricJSON - metricInfo['items'].each do |pod| - podNameSpace = pod['metadata']['namespace'] - if podNameSpace.eql?("kube-system") && !pod['metadata'].key?("ownerReferences") - # The above case seems to be the only case where you have horizontal scaling of pods - # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash - # instead of the actual poduid. Since this uid is not being surface into the UX - # its ok to use this. - # Use kubernetes.io/config.hash to be able to correlate with cadvisor data - podUid = pod['metadata']['annotations']['kubernetes.io/config.hash'] - else - podUid = pod['metadata']['uid'] - end - if (!pod['spec']['containers'].nil? && !pod['spec']['nodeName'].nil?) - nodeName = pod['spec']['nodeName'] - pod['spec']['containers'].each do |container| - containerName = container['name'] - metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z - if (!container['resources'].nil? && !container['resources'].empty? && !container['resources'][metricCategory].nil? && !container['resources'][metricCategory][metricNameToCollect].nil?) - metricValue = getMetricNumericValue(metricNameToCollect, container['resources'][metricCategory][metricNameToCollect]) - - metricItem = {} - metricItem['DataItems'] = [] - - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = nodeName - metricProps['ObjectName'] = "K8SContainer" - metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName - - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = metricValue - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - metricItems.push(metricItem) - #No container level limit for the given metric, so default to node level limit - else - nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect - if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey)) - - metricValue = @@NodeMetrics[nodeMetricsHashKey] - #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ") - metricItem = {} - metricItem['DataItems'] = [] - - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = nodeName - metricProps['ObjectName'] = "K8SContainer" - metricProps['InstanceName'] = clusterId + "/" + podUid + "/" + containerName - - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = metricValue - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - metricItems.push(metricItem) - end - end - end - end - end - rescue => error - @Log.warn("getcontainerResourceRequestsAndLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}") - return metricItems - end - return metricItems - end #getContainerResourceRequestAndLimits - - def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn) - metricItems = [] - begin - metricInfo = metricJSON - clusterId = getClusterId - #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics, - #if we are coming up with the time it should be same for all nodes - metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z - metricInfo['items'].each do |node| - if (!node['status'][metricCategory].nil?) - - # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory" - metricValue = getMetricNumericValue(metricNameToCollect, node['status'][metricCategory][metricNameToCollect]) - - metricItem = {} - metricItem['DataItems'] = [] - metricProps = {} - metricProps['Timestamp'] = metricTime - metricProps['Host'] = node['metadata']['name'] - metricProps['ObjectName'] = "K8SNode" - metricProps['InstanceName'] = clusterId + "/" + node['metadata']['name'] - metricProps['Collections'] = [] - metricCollections = {} - metricCollections['CounterName'] = metricNametoReturn - metricCollections['Value'] = metricValue - - metricProps['Collections'].push(metricCollections) - metricItem['DataItems'].push(metricProps) - metricItems.push(metricItem) - #push node level metrics to a inmem hash so that we can use it looking up at container level. - #Currently if container level cpu & memory limits are not defined we default to node level limits - @@NodeMetrics[clusterId + "/" + node['metadata']['name'] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue - #@Log.info ("Node metric hash: #{@@NodeMetrics}") - end - end - rescue => error - @Log.warn("parseNodeLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}") - end - return metricItems - end #parseNodeLimits - - def getMetricNumericValue(metricName, metricVal) - metricValue = metricVal - begin - case metricName - when "memory" #convert to bytes for memory - #https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/ - if (metricValue.end_with?("Ki")) - metricValue.chomp!("Ki") - metricValue = Float(metricValue) * 1024.0 ** 1 - elsif (metricValue.end_with?("Mi")) - metricValue.chomp!("Mi") - metricValue = Float(metricValue) * 1024.0 ** 2 - elsif (metricValue.end_with?("Gi")) - metricValue.chomp!("Gi") - metricValue = Float(metricValue) * 1024.0 ** 3 - elsif (metricValue.end_with?("Ti")) - metricValue.chomp!("Ti") - metricValue = Float(metricValue) * 1024.0 ** 4 - elsif (metricValue.end_with?("Pi")) - metricValue.chomp!("Pi") - metricValue = Float(metricValue) * 1024.0 ** 5 - elsif (metricValue.end_with?("Ei")) - metricValue.chomp!("Ei") - metricValue = Float(metricValue) * 1024.0 ** 6 - elsif (metricValue.end_with?("Zi")) - metricValue.chomp!("Zi") - metricValue = Float(metricValue) * 1024.0 ** 7 - elsif (metricValue.end_with?("Yi")) - metricValue.chomp!("Yi") - metricValue = Float(metricValue) * 1024.0 ** 8 - elsif (metricValue.end_with?("K")) - metricValue.chomp!("K") - metricValue = Float(metricValue) * 1000.0 ** 1 - elsif (metricValue.end_with?("M")) - metricValue.chomp!("M") - metricValue = Float(metricValue) * 1000.0 ** 2 - elsif (metricValue.end_with?("G")) - metricValue.chomp!("G") - metricValue = Float(metricValue) * 1000.0 ** 3 - elsif (metricValue.end_with?("T")) - metricValue.chomp!("T") - metricValue = Float(metricValue) * 1000.0 ** 4 - elsif (metricValue.end_with?("P")) - metricValue.chomp!("P") - metricValue = Float(metricValue) * 1000.0 ** 5 - elsif (metricValue.end_with?("E")) - metricValue.chomp!("E") - metricValue = Float(metricValue) * 1000.0 ** 6 - elsif (metricValue.end_with?("Z")) - metricValue.chomp!("Z") - metricValue = Float(metricValue) * 1000.0 ** 7 - elsif (metricValue.end_with?("Y")) - metricValue.chomp!("Y") - metricValue = Float(metricValue) * 1000.0 ** 8 - else #assuming there are no units specified, it is bytes (the below conversion will fail for other unsupported 'units') - metricValue = Float(metricValue) - end - when "cpu" #convert to nanocores for cpu - #https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/ - if (metricValue.end_with?("m")) - metricValue.chomp!("m") - metricValue = Float(metricValue) * 1000.0 ** 2 - else #assuming no units specified, it is cores that we are converting to nanocores (the below conversion will fail for other unsupported 'units') - metricValue = Float(metricValue) * 1000.0 ** 3 - end - else - @Log.warn("getMetricNumericValue: Unsupported metric #{metricName}. Returning 0 for metric value") - metricValue = 0 - end #case statement - rescue => error - @Log.warn("getMetricNumericValue failed: #{error} for metric #{metricName} with value #{metricVal}. Returning 0 formetric value") - return 0 - end - return metricValue - end # getMetricNumericValue + def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn) + metricItems = [] + begin + metricInfo = metricJSON + clusterId = getClusterId + #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics, + #if we are coming up with the time it should be same for all nodes + metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z + metricInfo["items"].each do |node| + if (!node["status"][metricCategory].nil?) + + # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory" + metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect]) + + metricItem = {} + metricItem["DataItems"] = [] + metricProps = {} + metricProps["Timestamp"] = metricTime + metricProps["Host"] = node["metadata"]["name"] + metricProps["ObjectName"] = "K8SNode" + metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"] + metricProps["Collections"] = [] + metricCollections = {} + metricCollections["CounterName"] = metricNametoReturn + metricCollections["Value"] = metricValue + + metricProps["Collections"].push(metricCollections) + metricItem["DataItems"].push(metricProps) + metricItems.push(metricItem) + #push node level metrics to a inmem hash so that we can use it looking up at container level. + #Currently if container level cpu & memory limits are not defined we default to node level limits + @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue + #@Log.info ("Node metric hash: #{@@NodeMetrics}") + end end - end + rescue => error + @Log.warn("parseNodeLimits failed: #{error} for metric #{metricCategory} #{metricNameToCollect}") + end + return metricItems + end #parseNodeLimits + def getMetricNumericValue(metricName, metricVal) + metricValue = metricVal + begin + case metricName + when "memory" #convert to bytes for memory + #https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/ + if (metricValue.end_with?("Ki")) + metricValue.chomp!("Ki") + metricValue = Float(metricValue) * 1024.0 ** 1 + elsif (metricValue.end_with?("Mi")) + metricValue.chomp!("Mi") + metricValue = Float(metricValue) * 1024.0 ** 2 + elsif (metricValue.end_with?("Gi")) + metricValue.chomp!("Gi") + metricValue = Float(metricValue) * 1024.0 ** 3 + elsif (metricValue.end_with?("Ti")) + metricValue.chomp!("Ti") + metricValue = Float(metricValue) * 1024.0 ** 4 + elsif (metricValue.end_with?("Pi")) + metricValue.chomp!("Pi") + metricValue = Float(metricValue) * 1024.0 ** 5 + elsif (metricValue.end_with?("Ei")) + metricValue.chomp!("Ei") + metricValue = Float(metricValue) * 1024.0 ** 6 + elsif (metricValue.end_with?("Zi")) + metricValue.chomp!("Zi") + metricValue = Float(metricValue) * 1024.0 ** 7 + elsif (metricValue.end_with?("Yi")) + metricValue.chomp!("Yi") + metricValue = Float(metricValue) * 1024.0 ** 8 + elsif (metricValue.end_with?("K")) + metricValue.chomp!("K") + metricValue = Float(metricValue) * 1000.0 ** 1 + elsif (metricValue.end_with?("M")) + metricValue.chomp!("M") + metricValue = Float(metricValue) * 1000.0 ** 2 + elsif (metricValue.end_with?("G")) + metricValue.chomp!("G") + metricValue = Float(metricValue) * 1000.0 ** 3 + elsif (metricValue.end_with?("T")) + metricValue.chomp!("T") + metricValue = Float(metricValue) * 1000.0 ** 4 + elsif (metricValue.end_with?("P")) + metricValue.chomp!("P") + metricValue = Float(metricValue) * 1000.0 ** 5 + elsif (metricValue.end_with?("E")) + metricValue.chomp!("E") + metricValue = Float(metricValue) * 1000.0 ** 6 + elsif (metricValue.end_with?("Z")) + metricValue.chomp!("Z") + metricValue = Float(metricValue) * 1000.0 ** 7 + elsif (metricValue.end_with?("Y")) + metricValue.chomp!("Y") + metricValue = Float(metricValue) * 1000.0 ** 8 + else #assuming there are no units specified, it is bytes (the below conversion will fail for other unsupported 'units') + metricValue = Float(metricValue) + end + when "cpu" #convert to nanocores for cpu + #https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/ + if (metricValue.end_with?("m")) + metricValue.chomp!("m") + metricValue = Float(metricValue) * 1000.0 ** 2 + else #assuming no units specified, it is cores that we are converting to nanocores (the below conversion will fail for other unsupported 'units') + metricValue = Float(metricValue) * 1000.0 ** 3 + end + else + @Log.warn("getMetricNumericValue: Unsupported metric #{metricName}. Returning 0 for metric value") + metricValue = 0 + end #case statement + rescue => error + @Log.warn("getMetricNumericValue failed: #{error} for metric #{metricName} with value #{metricVal}. Returning 0 formetric value") + return 0 + end + return metricValue + end # getMetricNumericValue + end +end diff --git a/source/code/plugin/in_cadvisor_perf.rb b/source/code/plugin/in_cadvisor_perf.rb index a857aa6b9..f5f65f01b 100644 --- a/source/code/plugin/in_cadvisor_perf.rb +++ b/source/code/plugin/in_cadvisor_perf.rb @@ -2,90 +2,88 @@ # frozen_string_literal: true module Fluent - - class CAdvisor_Perf_Input < Input - Plugin.register_input('cadvisorperf', self) - - def initialize - super - require 'yaml' - require 'json' - - require_relative 'CAdvisorMetricsAPIClient' - require_relative 'oms_common' - require_relative 'omslog' - end - - config_param :run_interval, :time, :default => '1m' - config_param :tag, :string, :default => "oms.api.cadvisorperf" - config_param :mdmtag, :string, :default => "mdm.cadvisorperf" - - def configure (conf) - super + class CAdvisor_Perf_Input < Input + Plugin.register_input("cadvisorperf", self) + + def initialize + super + require "yaml" + require "json" + + require_relative "CAdvisorMetricsAPIClient" + require_relative "oms_common" + require_relative "omslog" + end + + config_param :run_interval, :time, :default => "1m" + config_param :tag, :string, :default => "oms.api.cadvisorperf" + config_param :mdmtag, :string, :default => "mdm.cadvisorperf" + + def configure(conf) + super + end + + def start + if @run_interval + @finished = false + @condition = ConditionVariable.new + @mutex = Mutex.new + @thread = Thread.new(&method(:run_periodic)) end - - def start - if @run_interval - @finished = false - @condition = ConditionVariable.new - @mutex = Mutex.new - @thread = Thread.new(&method(:run_periodic)) - end + end + + def shutdown + if @run_interval + @mutex.synchronize { + @finished = true + @condition.signal + } + @thread.join end - - def shutdown - if @run_interval - @mutex.synchronize { - @finished = true - @condition.signal - } - @thread.join + end + + def enumerate() + time = Time.now.to_f + begin + eventStream = MultiEventStream.new + metricData = CAdvisorMetricsAPIClient.getMetrics() + metricData.each do |record| + record["DataType"] = "LINUX_PERF_BLOB" + record["IPName"] = "LogManagement" + eventStream.add(time, record) if record + #router.emit(@tag, time, record) if record end - end - - def enumerate() - time = Time.now.to_f - begin - eventStream = MultiEventStream.new - metricData = CAdvisorMetricsAPIClient.getMetrics() - metricData.each do |record| - record['DataType'] = "LINUX_PERF_BLOB" - record['IPName'] = "LogManagement" - eventStream.add(time, record) if record - #router.emit(@tag, time, record) if record - end - - router.emit_stream(@tag, eventStream) if eventStream - router.emit_stream(@mdmtag, eventStream) if eventStream - @@istestvar = ENV['ISTEST'] - if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) - $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") - end - rescue => errorStr - $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}" - $log.debug_backtrace(errorStr.backtrace) + + router.emit_stream(@tag, eventStream) if eventStream + router.emit_stream(@mdmtag, eventStream) if eventStream + @@istestvar = ENV["ISTEST"] + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0) + $log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") end + rescue => errorStr + $log.warn "Failed to retrieve cadvisor metric data: #{errorStr}" + $log.debug_backtrace(errorStr.backtrace) end - - def run_periodic - @mutex.lock + end + + def run_periodic + @mutex.lock + done = @finished + until done + @condition.wait(@mutex, @run_interval) done = @finished - until done - @condition.wait(@mutex, @run_interval) - done = @finished - @mutex.unlock - if !done - begin - $log.info("in_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}") - enumerate - rescue => errorStr - $log.warn "in_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics: #{errorStr}" - end + @mutex.unlock + if !done + begin + $log.info("in_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}") + enumerate + rescue => errorStr + $log.warn "in_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics: #{errorStr}" end - @mutex.lock end - @mutex.unlock + @mutex.lock end - end # CAdvisor_Perf_Input + @mutex.unlock + end + end # CAdvisor_Perf_Input end # module - diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb index f501421a2..4d83278a9 100644 --- a/source/code/plugin/in_containerinventory.rb +++ b/source/code/plugin/in_containerinventory.rb @@ -2,29 +2,28 @@ # frozen_string_literal: true module Fluent - class Container_Inventory_Input < Input - Plugin.register_input('containerinventory', self) + Plugin.register_input("containerinventory", self) - @@PluginName = 'ContainerInventory' - @@RunningState = 'Running' - @@FailedState = 'Failed' - @@StoppedState = 'Stopped' - @@PausedState = 'Paused' + @@PluginName = "ContainerInventory" + @@RunningState = "Running" + @@FailedState = "Failed" + @@StoppedState = "Stopped" + @@PausedState = "Paused" def initialize super - require 'json' - require_relative 'DockerApiClient' - require_relative 'ContainerInventoryState' - require_relative 'ApplicationInsightsUtility' - require_relative 'omslog' + require "json" + require_relative "DockerApiClient" + require_relative "ContainerInventoryState" + require_relative "ApplicationInsightsUtility" + require_relative "omslog" end - config_param :run_interval, :time, :default => '1m' + config_param :run_interval, :time, :default => "1m" config_param :tag, :string, :default => "oms.containerinsights.containerinventory" - - def configure (conf) + + def configure(conf) super end @@ -50,16 +49,16 @@ def shutdown def obtainContainerConfig(instance, container) begin - configValue = container['Config'] + configValue = container["Config"] if !configValue.nil? - instance['ContainerHostname'] = configValue['Hostname'] + instance["ContainerHostname"] = configValue["Hostname"] - envValue = configValue['Env'] + envValue = configValue["Env"] envValueString = (envValue.nil?) ? "" : envValue.to_s # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE if /AZMON_COLLECT_ENV=FALSE/i.match(envValueString) envValueString = ["AZMON_COLLECT_ENV=FALSE"] - $log.warn("Environment Variable collection for container: #{container['Id']} skipped because AZMON_COLLECT_ENV is set to false") + $log.warn("Environment Variable collection for container: #{container["Id"]} skipped because AZMON_COLLECT_ENV is set to false") end # Restricting the ENV string value to 200kb since the size of this string can go very high if envValueString.length > 200000 @@ -68,88 +67,88 @@ def obtainContainerConfig(instance, container) if !lastIndex.nil? envValueStringTruncated = envValueStringTruncated.slice(0..lastIndex) + "]" end - instance['EnvironmentVar'] = envValueStringTruncated + instance["EnvironmentVar"] = envValueStringTruncated else - instance['EnvironmentVar'] = envValueString + instance["EnvironmentVar"] = envValueString end - cmdValue = configValue['Cmd'] + cmdValue = configValue["Cmd"] cmdValueString = (cmdValue.nil?) ? "" : cmdValue.to_s - instance['Command'] = cmdValueString + instance["Command"] = cmdValueString - instance['ComposeGroup'] = "" - labelsValue = configValue['Labels'] + instance["ComposeGroup"] = "" + labelsValue = configValue["Labels"] if !labelsValue.nil? && !labelsValue.empty? - instance['ComposeGroup'] = labelsValue['com.docker.compose.project'] + instance["ComposeGroup"] = labelsValue["com.docker.compose.project"] end else - $log.warn("Attempt in ObtainContainerConfig to get container: #{container['Id']} config information returned null") - end - rescue => errorStr - $log.warn("Exception in obtainContainerConfig: #{errorStr}") + $log.warn("Attempt in ObtainContainerConfig to get container: #{container["Id"]} config information returned null") end + rescue => errorStr + $log.warn("Exception in obtainContainerConfig: #{errorStr}") + end end def obtainContainerState(instance, container) begin - stateValue = container['State'] + stateValue = container["State"] if !stateValue.nil? - exitCodeValue = stateValue['ExitCode'] + exitCodeValue = stateValue["ExitCode"] # Exit codes less than 0 are not supported by the engine if exitCodeValue < 0 - exitCodeValue = 128 - $log.info("obtainContainerState::Container: #{container['Id']} returned negative exit code") + exitCodeValue = 128 + $log.info("obtainContainerState::Container: #{container["Id"]} returned negative exit code") end - instance['ExitCode'] = exitCodeValue + instance["ExitCode"] = exitCodeValue if exitCodeValue > 0 - instance['State'] = @@FailedState + instance["State"] = @@FailedState else # Set the Container status : Running/Paused/Stopped - runningValue = stateValue['Running'] + runningValue = stateValue["Running"] if runningValue - pausedValue = stateValue['Paused'] + pausedValue = stateValue["Paused"] # Checking for paused within running is true state because docker returns true for both Running and Paused fields when the container is paused if pausedValue - instance['State'] = @@PausedState + instance["State"] = @@PausedState else - instance['State'] = @@RunningState + instance["State"] = @@RunningState end else - instance['State'] = @@StoppedState + instance["State"] = @@StoppedState end end - instance['StartedTime'] = stateValue['StartedAt'] - instance['FinishedTime'] = stateValue['FinishedAt'] + instance["StartedTime"] = stateValue["StartedAt"] + instance["FinishedTime"] = stateValue["FinishedAt"] else - $log.info("Attempt in ObtainContainerState to get container: #{container['Id']} state information returned null") + $log.info("Attempt in ObtainContainerState to get container: #{container["Id"]} state information returned null") end - rescue => errorStr - $log.warn("Exception in obtainContainerState: #{errorStr}") + rescue => errorStr + $log.warn("Exception in obtainContainerState: #{errorStr}") end end def obtainContainerHostConfig(instance, container) begin - hostConfig = container['HostConfig'] + hostConfig = container["HostConfig"] if !hostConfig.nil? - links = hostConfig['Links'] - instance['Links'] = "" + links = hostConfig["Links"] + instance["Links"] = "" if !links.nil? linksString = links.to_s - instance['Links'] = (linksString == "null")? "" : linksString + instance["Links"] = (linksString == "null") ? "" : linksString end - portBindings = hostConfig['PortBindings'] - instance['Ports'] = "" + portBindings = hostConfig["PortBindings"] + instance["Ports"] = "" if !portBindings.nil? portBindingsString = portBindings.to_s - instance['Ports'] = (portBindingsString == "null")? "" : portBindingsString + instance["Ports"] = (portBindingsString == "null") ? "" : portBindingsString end else - $log.info("Attempt in ObtainContainerHostConfig to get container: #{container['Id']} host config information returned null") - end - rescue => errorStr - $log.warn("Exception in obtainContainerHostConfig: #{errorStr}") + $log.info("Attempt in ObtainContainerHostConfig to get container: #{container["Id"]} host config information returned null") end + rescue => errorStr + $log.warn("Exception in obtainContainerHostConfig: #{errorStr}") + end end def inspectContainer(id, nameMap) @@ -157,29 +156,29 @@ def inspectContainer(id, nameMap) begin container = DockerApiClient.dockerInspectContainer(id) if !container.nil? && !container.empty? - containerInstance['InstanceID'] = container['Id'] - containerInstance['CreatedTime'] = container['Created'] - containerName = container['Name'] + containerInstance["InstanceID"] = container["Id"] + containerInstance["CreatedTime"] = container["Created"] + containerName = container["Name"] if !containerName.nil? && !containerName.empty? # Remove the leading / from the name if it exists (this is an API issue) - containerInstance['ElementName'] = (containerName[0] == '/') ? containerName[1..-1] : containerName + containerInstance["ElementName"] = (containerName[0] == "/") ? containerName[1..-1] : containerName end - imageValue = container['Image'] + imageValue = container["Image"] if !imageValue.nil? && !imageValue.empty? - containerInstance['ImageId'] = imageValue + containerInstance["ImageId"] = imageValue repoImageTagArray = nameMap[imageValue] if nameMap.has_key? imageValue - containerInstance['Repository'] = repoImageTagArray[0] - containerInstance['Image'] = repoImageTagArray[1] - containerInstance['ImageTag'] = repoImageTagArray[2] + containerInstance["Repository"] = repoImageTagArray[0] + containerInstance["Image"] = repoImageTagArray[1] + containerInstance["ImageTag"] = repoImageTagArray[2] end end - obtainContainerConfig(containerInstance, container); - obtainContainerState(containerInstance, container); - obtainContainerHostConfig(containerInstance, container); + obtainContainerConfig(containerInstance, container) + obtainContainerState(containerInstance, container) + obtainContainerHostConfig(containerInstance, container) end rescue => errorStr - $log.warn("Exception in inspectContainer: #{errorStr} for container: #{id}") + $log.warn("Exception in inspectContainer: #{errorStr} for container: #{id}") end return containerInstance end @@ -199,8 +198,8 @@ def enumerate containerIds.each do |containerId| inspectedContainer = {} inspectedContainer = inspectContainer(containerId, nameMap) - inspectedContainer['Computer'] = hostname - inspectedContainer['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated + inspectedContainer["Computer"] = hostname + inspectedContainer["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated containerInventory.push inspectedContainer ContainerInventoryState.writeContainerState(inspectedContainer) end @@ -210,8 +209,8 @@ def enumerate deletedContainers.each do |deletedContainer| container = ContainerInventoryState.readContainerState(deletedContainer) if !container.nil? - container.each{|k,v| container[k]=v} - container['State'] = "Deleted" + container.each { |k, v| container[k] = v } + container["State"] = "Deleted" containerInventory.push container end end @@ -219,28 +218,28 @@ def enumerate containerInventory.each do |record| wrapper = { - "DataType"=>"CONTAINER_INVENTORY_BLOB", - "IPName"=>"ContainerInsights", - "DataItems"=>[record.each{|k,v| record[k]=v}] + "DataType" => "CONTAINER_INVENTORY_BLOB", + "IPName" => "ContainerInsights", + "DataItems" => [record.each { |k, v| record[k] = v }], } eventStream.add(emitTime, wrapper) if wrapper end router.emit_stream(@tag, eventStream) if eventStream - @@istestvar = ENV['ISTEST'] - if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) + @@istestvar = ENV["ISTEST"] + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0) $log.info("containerInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") end - timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference/60 - if (timeDifferenceInMinutes >= 5) - @@telemetryTimeTracker = DateTime.now.to_time.to_i - telemetryProperties = {} - telemetryProperties['Computer'] = hostname - telemetryProperties['ContainerCount'] = containerInventory.length - ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) - end $log.info("in_container_inventory::enumerate : Processing complete - emitted stream @ #{Time.now.utc.iso8601}") end + timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= 5) + @@telemetryTimeTracker = DateTime.now.to_time.to_i + telemetryProperties = {} + telemetryProperties["Computer"] = hostname + telemetryProperties["ContainerCount"] = containerInventory.length + ApplicationInsightsUtility.sendTelemetry(@@PluginName, telemetryProperties) + end rescue => errorStr $log.warn("Exception in enumerate container inventory: #{errorStr}") end @@ -265,7 +264,5 @@ def run_periodic end @mutex.unlock end - end # Container_Inventory_Input - -end # module \ No newline at end of file +end # module diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index ba1dacbe0..aabda441e 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -2,181 +2,176 @@ # frozen_string_literal: true module Fluent + class Kube_nodeInventory_Input < Input + Plugin.register_input("kubenodeinventory", self) - class Kube_nodeInventory_Input < Input - Plugin.register_input('kubenodeinventory', self) - - @@ContainerNodeInventoryTag = 'oms.containerinsights.ContainerNodeInventory' - @@MDMKubeNodeInventoryTag = 'mdm.kubenodeinventory' + @@ContainerNodeInventoryTag = "oms.containerinsights.ContainerNodeInventory" + @@MDMKubeNodeInventoryTag = "mdm.kubenodeinventory" - def initialize - super - require 'yaml' - require 'json' - - require_relative 'KubernetesApiClient' - require_relative 'ApplicationInsightsUtility' - require_relative 'oms_common' - require_relative 'omslog' + def initialize + super + require "yaml" + require "json" + require_relative "KubernetesApiClient" + require_relative "ApplicationInsightsUtility" + require_relative "oms_common" + require_relative "omslog" + end + + config_param :run_interval, :time, :default => "1m" + config_param :tag, :string, :default => "oms.containerinsights.KubeNodeInventory" + + def configure(conf) + super + end + + def start + if @run_interval + @finished = false + @condition = ConditionVariable.new + @mutex = Mutex.new + @thread = Thread.new(&method(:run_periodic)) + @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i end - - config_param :run_interval, :time, :default => '1m' - config_param :tag, :string, :default => "oms.containerinsights.KubeNodeInventory" - - def configure (conf) - super - end - - def start - if @run_interval - @finished = false - @condition = ConditionVariable.new - @mutex = Mutex.new - @thread = Thread.new(&method(:run_periodic)) - @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i - end - end - - def shutdown - if @run_interval - @mutex.synchronize { - @finished = true - @condition.signal - } - @thread.join - end - end - - def enumerate - currentTime = Time.now - emitTime = currentTime.to_f - batchTime = currentTime.utc.iso8601 - telemetrySent = false - $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") - nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('nodes').body) - $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") - begin - if(!nodeInventory.empty?) - eventStream = MultiEventStream.new - containerNodeInventoryEventStream = MultiEventStream.new - #get node inventory - nodeInventory['items'].each do |items| - record = {} - # Sending records for ContainerNodeInventory - containerNodeInventoryRecord = {} - containerNodeInventoryRecord['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated - containerNodeInventoryRecord['Computer'] = items['metadata']['name'] + end - record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated - record['Computer'] = items['metadata']['name'] - record['ClusterName'] = KubernetesApiClient.getClusterName - record['ClusterId'] = KubernetesApiClient.getClusterId - record['CreationTimeStamp'] = items['metadata']['creationTimestamp'] - record['Labels'] = [items['metadata']['labels']] - record['Status'] = "" + def shutdown + if @run_interval + @mutex.synchronize { + @finished = true + @condition.signal + } + @thread.join + end + end - # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions. - # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we - # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk" - # implying that the node is ready for hosting pods, however its out of disk. - - if items['status'].key?("conditions") && !items['status']['conditions'].empty? - allNodeConditions="" - items['status']['conditions'].each do |condition| - if condition['status'] == "True" - if !allNodeConditions.empty? - allNodeConditions = allNodeConditions + "," + condition['type'] - else - allNodeConditions = condition['type'] - end - end - #collect last transition to/from ready (no matter ready is true/false) - if condition['type'] == "Ready" && !condition['lastTransitionTime'].nil? - record['LastTransitionTimeReady'] = condition['lastTransitionTime'] - end - end - if !allNodeConditions.empty? - record['Status'] = allNodeConditions - end + def enumerate + currentTime = Time.now + emitTime = currentTime.to_f + batchTime = currentTime.utc.iso8601 + telemetrySent = false + $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") + nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") + begin + if (!nodeInventory.empty?) + eventStream = MultiEventStream.new + containerNodeInventoryEventStream = MultiEventStream.new + #get node inventory + nodeInventory["items"].each do |items| + record = {} + # Sending records for ContainerNodeInventory + containerNodeInventoryRecord = {} + containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated + containerNodeInventoryRecord["Computer"] = items["metadata"]["name"] - end + record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated + record["Computer"] = items["metadata"]["name"] + record["ClusterName"] = KubernetesApiClient.getClusterName + record["ClusterId"] = KubernetesApiClient.getClusterId + record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"] + record["Labels"] = [items["metadata"]["labels"]] + record["Status"] = "" - nodeInfo = items['status']['nodeInfo'] - record['KubeletVersion'] = nodeInfo['kubeletVersion'] - record['KubeProxyVersion'] = nodeInfo['kubeProxyVersion'] - containerNodeInventoryRecord['OperatingSystem'] = nodeInfo['osImage'] - dockerVersion = nodeInfo['containerRuntimeVersion'] - dockerVersion.slice! "docker://" - containerNodeInventoryRecord['DockerVersion'] = dockerVersion - # ContainerNodeInventory data for docker version and operating system. - containerNodeInventoryWrapper = { - "DataType"=>"CONTAINER_NODE_INVENTORY_BLOB", - "IPName"=>"ContainerInsights", - "DataItems"=>[containerNodeInventoryRecord.each{|k,v| containerNodeInventoryRecord[k]=v}] - } - containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper + # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions. + # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we + # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk" + # implying that the node is ready for hosting pods, however its out of disk. - wrapper = { - "DataType"=>"KUBE_NODE_INVENTORY_BLOB", - "IPName"=>"ContainerInsights", - "DataItems"=>[record.each{|k,v| record[k]=v}] - } - eventStream.add(emitTime, wrapper) if wrapper - # Adding telemetry to send node telemetry every 5 minutes - timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference/60 - if (timeDifferenceInMinutes >= 5) - properties = {} - properties["Computer"] = record["Computer"] - properties["KubeletVersion"] = record["KubeletVersion"] - capacityInfo = items['status']['capacity'] - ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"] , properties) - ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"] , properties) - telemetrySent = true - end - end - router.emit_stream(@tag, eventStream) if eventStream - router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream - router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream - if telemetrySent == true - @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i + if items["status"].key?("conditions") && !items["status"]["conditions"].empty? + allNodeConditions = "" + items["status"]["conditions"].each do |condition| + if condition["status"] == "True" + if !allNodeConditions.empty? + allNodeConditions = allNodeConditions + "," + condition["type"] + else + allNodeConditions = condition["type"] + end end - @@istestvar = ENV['ISTEST'] - if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) - $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") + #collect last transition to/from ready (no matter ready is true/false) + if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil? + record["LastTransitionTimeReady"] = condition["lastTransitionTime"] end - end - rescue => errorStr - $log.warn "Failed to retrieve node inventory: #{errorStr}" - $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end - end - - def run_periodic - @mutex.lock - done = @finished - until done - @condition.wait(@mutex, @run_interval) - done = @finished - @mutex.unlock - if !done - begin - $log.info("in_kube_nodes::run_periodic @ #{Time.now.utc.iso8601}") - enumerate - rescue => errorStr - $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}" - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + if !allNodeConditions.empty? + record["Status"] = allNodeConditions + end end + + nodeInfo = items["status"]["nodeInfo"] + record["KubeletVersion"] = nodeInfo["kubeletVersion"] + record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"] + containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"] + dockerVersion = nodeInfo["containerRuntimeVersion"] + dockerVersion.slice! "docker://" + containerNodeInventoryRecord["DockerVersion"] = dockerVersion + # ContainerNodeInventory data for docker version and operating system. + containerNodeInventoryWrapper = { + "DataType" => "CONTAINER_NODE_INVENTORY_BLOB", + "IPName" => "ContainerInsights", + "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }], + } + containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper + + wrapper = { + "DataType" => "KUBE_NODE_INVENTORY_BLOB", + "IPName" => "ContainerInsights", + "DataItems" => [record.each { |k, v| record[k] = v }], + } + eventStream.add(emitTime, wrapper) if wrapper + # Adding telemetry to send node telemetry every 5 minutes + timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= 5) + properties = {} + properties["Computer"] = record["Computer"] + properties["KubeletVersion"] = record["KubeletVersion"] + properties["OperatingSystem"] = nodeInfo["operatingSystem"] + properties["DockerVersion"] = dockerVersion + capacityInfo = items["status"]["capacity"] + ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties) + ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties) + telemetrySent = true + end + end + router.emit_stream(@tag, eventStream) if eventStream + router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream + router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream + if telemetrySent == true + @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i + end + @@istestvar = ENV["ISTEST"] + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0) + $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") end - @mutex.lock end + rescue => errorStr + $log.warn "Failed to retrieve node inventory: #{errorStr}" + $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + + def run_periodic + @mutex.lock + done = @finished + until done + @condition.wait(@mutex, @run_interval) + done = @finished @mutex.unlock + if !done + begin + $log.info("in_kube_nodes::run_periodic @ #{Time.now.utc.iso8601}") + enumerate + rescue => errorStr + $log.warn "in_kube_nodes::run_periodic: enumerate Failed to retrieve node inventory: #{errorStr}" + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + @mutex.lock end - - end # Kube_Node_Input - - end # module - - \ No newline at end of file + @mutex.unlock + end + end # Kube_Node_Input +end # module diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index 3d026b05f..65573673c 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -2,29 +2,28 @@ # frozen_string_literal: true module Fluent - class Kube_PodInventory_Input < Input - Plugin.register_input('kubepodinventory', self) + Plugin.register_input("kubepodinventory", self) - @@MDMKubePodInventoryTag = 'mdm.kubepodinventory' + @@MDMKubePodInventoryTag = "mdm.kubepodinventory" + @@hostName = (OMS::Common.get_hostname) def initialize super - require 'yaml' - require 'json' - require 'set' - - require_relative 'KubernetesApiClient' - require_relative 'ApplicationInsightsUtility' - require_relative 'oms_common' - require_relative 'omslog' + require "yaml" + require "json" + require "set" + require_relative "KubernetesApiClient" + require_relative "ApplicationInsightsUtility" + require_relative "oms_common" + require_relative "omslog" end - config_param :run_interval, :time, :default => '1m' + config_param :run_interval, :time, :default => "1m" config_param :tag, :string, :default => "oms.containerinsights.KubePodInventory" - def configure (conf) + def configure(conf) super end @@ -48,29 +47,126 @@ def shutdown end end - def enumerate(podList = nil) - if podList.nil? - $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}") - podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo('pods').body) - $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") + def enumerate(podList = nil) + if podList.nil? + $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}") + podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods").body) + $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") + else + podInventory = podList + end + begin + if (!podInventory.empty? && podInventory.key?("items") && !podInventory["items"].empty?) + #get pod inventory & services + $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}") + serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body) + $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}") + parse_and_emit_records(podInventory, serviceList) else - podInventory = podList + $log.warn "Received empty podInventory" + end + rescue => errorStr + $log.warn "Failed in enumerate pod inventory: #{errorStr}" + $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + + def populateWindowsContainerInventoryRecord(container, record, containerEnvVariableHash, batchTime) + begin + containerInventoryRecord = {} + containerName = container["name"] + containerInventoryRecord["InstanceID"] = record["ContainerID"] + containerInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated + containerInventoryRecord["Computer"] = record["Computer"] + containerInventoryRecord["ContainerHostname"] = record["Computer"] + containerInventoryRecord["ElementName"] = containerName + image = container["image"] + repoInfo = image.split("/") + if !repoInfo.nil? + containerInventoryRecord["Repository"] = repoInfo[0] + if !repoInfo[1].nil? + imageInfo = repoInfo[1].split(":") + if !imageInfo.nil? + containerInventoryRecord["Image"] = imageInfo[0] + containerInventoryRecord["ImageTag"] = imageInfo[1] + end + end + end + imageIdInfo = container["imageID"] + imageIdSplitInfo = imageIdInfo.split("@") + if !imageIdSplitInfo.nil? + containerInventoryRecord["ImageId"] = imageIdSplitInfo[1] + end + # Get container state + containerStatus = container["state"] + if containerStatus.keys[0] == "running" + containerInventoryRecord["State"] = "Running" + containerInventoryRecord["StartedTime"] = container["state"]["running"]["startedAt"] + elsif containerStatus.keys[0] == "terminated" + containerExitCode = container["state"]["terminated"]["exitCode"] + containerStartTime = container["state"]["terminated"]["startedAt"] + containerFinishTime = container["state"]["terminated"]["finishedAt"] + if containerExitCode < 0 + # Exit codes less than 0 are not supported by the engine + containerExitCode = 128 + end + if containerExitCode > 0 + containerInventoryRecord["State"] = "Failed" + else + containerInventoryRecord["State"] = "Stopped" + end + containerInventoryRecord["ExitCode"] = containerExitCode + containerInventoryRecord["StartedTime"] = containerStartTime + containerInventoryRecord["FinishedTime"] = containerFinishTime + elsif containerStatus.keys[0] == "waiting" + containerInventoryRecord["State"] = "Waiting" + end + if !containerEnvVariableHash.nil? && !containerEnvVariableHash.empty? + containerInventoryRecord["EnvironmentVar"] = containerEnvVariableHash[containerName] end - begin - if(!podInventory.empty? && podInventory.key?("items") && !podInventory['items'].empty?) - #get pod inventory & services - $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}") - serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo('services').body) - $log.info("in_kube_podinventory::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}") - parse_and_emit_records(podInventory, serviceList) - else - $log.warn "Received empty podInventory" - end - rescue => errorStr - $log.warn "Failed in enumerate pod inventory: #{errorStr}" - $log.debug_backtrace(errorStr.backtrace) - ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end + return containerInventoryRecord + rescue => errorStr + $log.warn "Failed in populateWindowsContainerInventoryRecord: #{errorStr}" + $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end + end + + def getContainerEnvironmentVariables(pod) + begin + podSpec = pod["spec"] + containerEnvHash = {} + if !podSpec.nil? && !podSpec["containers"].nil? + podSpec["containers"].each do |container| + envVarsArray = [] + containerEnvArray = container["env"] + # Parsing the environment variable array of hashes to a string value + # since that is format being sent by container inventory workflow in daemonset + # Keeping it in the same format because the workflow expects it in this format + # and the UX expects an array of string for environment variables + if !containerEnvArray.nil? && !containerEnvArray.empty? + containerEnvArray.each do |envVarHash| + envName = envVarHash["name"] + envValue = envVarHash["value"] + envArrayElement = envName + "=" + envValue + envVarsArray.push(envArrayElement) + end + end + # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE + envValueString = envVarsArray.to_s + if /AZMON_COLLECT_ENV=FALSE/i.match(envValueString) + envValueString = ["AZMON_COLLECT_ENV=FALSE"] + end + containerEnvHash[container["name"]] = envValueString + end + end + return containerEnvHash + rescue => errorStr + $log.warn "Failed in getContainerEnvironmentVariables: #{errorStr}" + $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) + end end def parse_and_emit_records(podInventory, serviceList) @@ -80,100 +176,116 @@ def parse_and_emit_records(podInventory, serviceList) eventStream = MultiEventStream.new controllerSet = Set.new [] telemetryFlush = false + winContainerCount = 0 begin #begin block start - podInventory['items'].each do |items| #podInventory block start + # Getting windows nodes from kubeapi + winNodes = KubernetesApiClient.getWindowsNodesArray + + podInventory["items"].each do |items| #podInventory block start + sendWindowsContainerInventoryRecord = false + containerInventoryRecords = [] records = [] record = {} - record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated - record['Name'] = items['metadata']['name'] - podNameSpace = items['metadata']['namespace'] - - if podNameSpace.eql?("kube-system") && !items['metadata'].key?("ownerReferences") + record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated + record["Name"] = items["metadata"]["name"] + podNameSpace = items["metadata"]["namespace"] + + if podNameSpace.eql?("kube-system") && !items["metadata"].key?("ownerReferences") # The above case seems to be the only case where you have horizontal scaling of pods # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash # instead of the actual poduid. Since this uid is not being surface into the UX # its ok to use this. # Use kubernetes.io/config.hash to be able to correlate with cadvisor data - podUid = items['metadata']['annotations']['kubernetes.io/config.hash'] + podUid = items["metadata"]["annotations"]["kubernetes.io/config.hash"] else - podUid = items['metadata']['uid'] + podUid = items["metadata"]["uid"] end - record['PodUid'] = podUid - record['PodLabel'] = [items['metadata']['labels']] - record['Namespace'] = podNameSpace - record['PodCreationTimeStamp'] = items['metadata']['creationTimestamp'] + record["PodUid"] = podUid + record["PodLabel"] = [items["metadata"]["labels"]] + record["Namespace"] = podNameSpace + record["PodCreationTimeStamp"] = items["metadata"]["creationTimestamp"] #for unscheduled (non-started) pods startTime does NOT exist - if !items['status']['startTime'].nil? - record['PodStartTime'] = items['status']['startTime'] + if !items["status"]["startTime"].nil? + record["PodStartTime"] = items["status"]["startTime"] else - record['PodStartTime'] = "" + record["PodStartTime"] = "" end #podStatus # the below is for accounting 'NodeLost' scenario, where-in the pod(s) in the lost node is still being reported as running podReadyCondition = true - if !items['status']['reason'].nil? && items['status']['reason'] == "NodeLost" && !items['status']['conditions'].nil? - items['status']['conditions'].each do |condition| - if condition['type'] == "Ready" && condition['status'] == "False" + if !items["status"]["reason"].nil? && items["status"]["reason"] == "NodeLost" && !items["status"]["conditions"].nil? + items["status"]["conditions"].each do |condition| + if condition["type"] == "Ready" && condition["status"] == "False" podReadyCondition = false break end end end if podReadyCondition == false - record['PodStatus'] = "Unknown" + record["PodStatus"] = "Unknown" else - record['PodStatus'] = items['status']['phase'] + record["PodStatus"] = items["status"]["phase"] end #for unscheduled (non-started) pods podIP does NOT exist - if !items['status']['podIP'].nil? - record['PodIp'] =items['status']['podIP'] + if !items["status"]["podIP"].nil? + record["PodIp"] = items["status"]["podIP"] else - record['PodIp'] = "" + record["PodIp"] = "" end #for unscheduled (non-started) pods nodeName does NOT exist - if !items['spec']['nodeName'].nil? - record['Computer'] = items['spec']['nodeName'] + if !items["spec"]["nodeName"].nil? + record["Computer"] = items["spec"]["nodeName"] else - record['Computer'] = "" - end - record['ClusterId'] = KubernetesApiClient.getClusterId - record['ClusterName'] = KubernetesApiClient.getClusterName - record['ServiceName'] = getServiceNameFromLabels(items['metadata']['namespace'], items['metadata']['labels'], serviceList) - # Adding telemetry to send pod telemetry every 5 minutes - timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference/60 - if (timeDifferenceInMinutes >= 5) - telemetryFlush = true - end - if !items['metadata']['ownerReferences'].nil? - record['ControllerKind'] = items['metadata']['ownerReferences'][0]['kind'] - record['ControllerName'] = items['metadata']['ownerReferences'][0]['name'] + record["Computer"] = "" + end + + # Setting this flag to true so that we can send ContainerInventory records for containers + # on windows nodes and parse environment variables for these containers + if winNodes.length > 0 + if (!record["Computer"].empty? && (winNodes.include? record["Computer"])) + sendWindowsContainerInventoryRecord = true + containerEnvVariableHash = getContainerEnvironmentVariables(items) + end + end + + record["ClusterId"] = KubernetesApiClient.getClusterId + record["ClusterName"] = KubernetesApiClient.getClusterName + record["ServiceName"] = getServiceNameFromLabels(items["metadata"]["namespace"], items["metadata"]["labels"], serviceList) + # Adding telemetry to send pod telemetry every 5 minutes + timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= 5) + telemetryFlush = true + end + if !items["metadata"]["ownerReferences"].nil? + record["ControllerKind"] = items["metadata"]["ownerReferences"][0]["kind"] + record["ControllerName"] = items["metadata"]["ownerReferences"][0]["name"] if telemetryFlush == true - controllerSet.add(record['ControllerKind'] + record['ControllerName']) + controllerSet.add(record["ControllerKind"] + record["ControllerName"]) end end podRestartCount = 0 - record['PodRestartCount'] = 0 - if items['status'].key?("containerStatuses") && !items['status']['containerStatuses'].empty? #container status block start - items['status']['containerStatuses'].each do |container| - containerRestartCount = 0 - #container Id is of the form - #docker://dfd9da983f1fd27432fb2c1fe3049c0a1d25b1c697b2dc1a530c986e58b16527 - if !container['containerID'].nil? - record['ContainerID'] = container['containerID'].split("//")[1] - else + record["PodRestartCount"] = 0 + if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? #container status block start + items["status"]["containerStatuses"].each do |container| + containerRestartCount = 0 + #container Id is of the form + #docker://dfd9da983f1fd27432fb2c1fe3049c0a1d25b1c697b2dc1a530c986e58b16527 + if !container["containerID"].nil? + record["ContainerID"] = container["containerID"].split("//")[1] + else # for containers that have image issues (like invalid image/tag etc..) this will be empty. do not make it all 0 - record['ContainerID'] = "" + record["ContainerID"] = "" end - #keeping this as which is same as InstanceName in perf table - record['ContainerName'] = podUid + "/" +container['name'] - #Pod restart count is a sumtotal of restart counts of individual containers - #within the pod. The restart count of a container is maintained by kubernetes - #itself in the form of a container label. - containerRestartCount = container['restartCount'] - record['ContainerRestartCount'] = containerRestartCount - containerStatus = container['state'] - record['ContainerStatusReason'] = '' + #keeping this as which is same as InstanceName in perf table + record["ContainerName"] = podUid + "/" + container["name"] + #Pod restart count is a sumtotal of restart counts of individual containers + #within the pod. The restart count of a container is maintained by kubernetes + #itself in the form of a container label. + containerRestartCount = container["restartCount"] + record["ContainerRestartCount"] = containerRestartCount + containerStatus = container["state"] + record["ContainerStatusReason"] = "" # state is of the following form , so just picking up the first key name # "state": { # "waiting": { @@ -183,55 +295,80 @@ def parse_and_emit_records(podInventory, serviceList) # }, # the below is for accounting 'NodeLost' scenario, where-in the containers in the lost node/pod(s) is still being reported as running if podReadyCondition == false - record['ContainerStatus'] = "Unknown" + record["ContainerStatus"] = "Unknown" else - record['ContainerStatus'] = containerStatus.keys[0] + record["ContainerStatus"] = containerStatus.keys[0] end #TODO : Remove ContainerCreationTimeStamp from here since we are sending it as a metric #Picking up both container and node start time from cAdvisor to be consistent if containerStatus.keys[0] == "running" - record['ContainerCreationTimeStamp'] = container['state']['running']['startedAt'] + record["ContainerCreationTimeStamp"] = container["state"]["running"]["startedAt"] else - if !containerStatus[containerStatus.keys[0]]['reason'].nil? && !containerStatus[containerStatus.keys[0]]['reason'].empty? - record['ContainerStatusReason'] = containerStatus[containerStatus.keys[0]]['reason'] + if !containerStatus[containerStatus.keys[0]]["reason"].nil? && !containerStatus[containerStatus.keys[0]]["reason"].empty? + record["ContainerStatusReason"] = containerStatus[containerStatus.keys[0]]["reason"] end end - podRestartCount += containerRestartCount - records.push(record.dup) - end + podRestartCount += containerRestartCount + records.push(record.dup) + + #Generate ContainerInventory records for windows nodes so that we can get image and image tag in property panel + if sendWindowsContainerInventoryRecord == true + containerInventoryRecord = populateWindowsContainerInventoryRecord(container, record, containerEnvVariableHash, batchTime) + containerInventoryRecords.push(containerInventoryRecord) + end + end else # for unscheduled pods there are no status.containerStatuses, in this case we still want the pod - records.push(record) + records.push(record) end #container status block end records.each do |record| if !record.nil? - record['PodRestartCount'] = podRestartCount + record["PodRestartCount"] = podRestartCount wrapper = { - "DataType"=>"KUBE_POD_INVENTORY_BLOB", - "IPName"=>"ContainerInsights", - "DataItems"=>[record.each{|k,v| record[k]=v}] + "DataType" => "KUBE_POD_INVENTORY_BLOB", + "IPName" => "ContainerInsights", + "DataItems" => [record.each { |k, v| record[k] = v }], } eventStream.add(emitTime, wrapper) if wrapper - end - end + end + end + # Send container inventory records for containers on windows nodes + winContainerCount += containerInventoryRecords.length + containerInventoryRecords.each do |cirecord| + if !cirecord.nil? + ciwrapper = { + "DataType" => "CONTAINER_INVENTORY_BLOB", + "IPName" => "ContainerInsights", + "DataItems" => [cirecord.each { |k, v| cirecord[k] = v }], + } + eventStream.add(emitTime, ciwrapper) if ciwrapper + end + end end #podInventory block end + router.emit_stream(@tag, eventStream) if eventStream router.emit_stream(@@MDMKubePodInventoryTag, eventStream) if eventStream if telemetryFlush == true - ApplicationInsightsUtility.sendHeartBeatEvent("KubePodInventory") - ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory['items'].length , {}) - ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length , {}) + telemetryProperties = {} + telemetryProperties["Computer"] = @@hostName + ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties) + ApplicationInsightsUtility.sendMetricTelemetry("PodCount", podInventory["items"].length, {}) + ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", controllerSet.length, {}) + if winContainerCount > 0 + telemetryProperties["ClusterWideWindowsContainersCount"] = winContainerCount + ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties) + end @@podTelemetryTimeTracker = DateTime.now.to_time.to_i end - @@istestvar = ENV['ISTEST'] - if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp('true') == 0 && eventStream.count > 0) + @@istestvar = ENV["ISTEST"] + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0) $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}") end - rescue => errorStr + rescue => errorStr $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) - end #begin block end - end + end #begin block end + end def run_periodic @mutex.lock @@ -257,37 +394,33 @@ def run_periodic def getServiceNameFromLabels(namespace, labels, serviceList) serviceName = "" begin - if !labels.nil? && !labels.empty? - if( !serviceList.nil? && !serviceList.empty? && serviceList.key?("items") && !serviceList['items'].empty?) - serviceList['items'].each do |item| + if !labels.nil? && !labels.empty? + if (!serviceList.nil? && !serviceList.empty? && serviceList.key?("items") && !serviceList["items"].empty?) + serviceList["items"].each do |item| found = 0 - if !item['spec'].nil? && !item['spec']['selector'].nil? && item['metadata']['namespace'] == namespace - selectorLabels = item['spec']['selector'] + if !item["spec"].nil? && !item["spec"]["selector"].nil? && item["metadata"]["namespace"] == namespace + selectorLabels = item["spec"]["selector"] if !selectorLabels.empty? - selectorLabels.each do |key,value| - if !(labels.select {|k,v| k==key && v==value}.length > 0) + selectorLabels.each do |key, value| + if !(labels.select { |k, v| k == key && v == value }.length > 0) break end found = found + 1 end - end + end if found == selectorLabels.length - return item['metadata']['name'] + return item["metadata"]["name"] end - end + end end - end + end end - rescue => errorStr + rescue => errorStr $log.warn "Failed to retrieve service name from labels: #{errorStr}" $log.debug_backtrace(errorStr.backtrace) ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) end return serviceName end - end # Kube_Pod_Input - end # module - - diff --git a/source/code/plugin/in_win_cadvisor_perf.rb b/source/code/plugin/in_win_cadvisor_perf.rb new file mode 100644 index 000000000..2e5f839e6 --- /dev/null +++ b/source/code/plugin/in_win_cadvisor_perf.rb @@ -0,0 +1,120 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +module Fluent + class Win_CAdvisor_Perf_Input < Input + Plugin.register_input("wincadvisorperf", self) + + @@winNodes = [] + + def initialize + super + require "yaml" + require "json" + + require_relative "CAdvisorMetricsAPIClient" + require_relative "KubernetesApiClient" + require_relative "oms_common" + require_relative "omslog" + end + + config_param :run_interval, :time, :default => "1m" + config_param :tag, :string, :default => "oms.api.wincadvisorperf" + config_param :mdmtag, :string, :default => "mdm.cadvisorperf" + + def configure(conf) + super + end + + def start + if @run_interval + @finished = false + @condition = ConditionVariable.new + @mutex = Mutex.new + @thread = Thread.new(&method(:run_periodic)) + @@winNodeQueryTimeTracker = DateTime.now.to_time.to_i + @@cleanupRoutineTimeTracker = DateTime.now.to_time.to_i + end + end + + def shutdown + if @run_interval + @mutex.synchronize { + @finished = true + @condition.signal + } + @thread.join + end + end + + def enumerate() + time = Time.now.to_f + begin + eventStream = MultiEventStream.new + timeDifference = (DateTime.now.to_time.to_i - @@winNodeQueryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + + #Resetting this cache so that it is populated with the current set of containers with every call + CAdvisorMetricsAPIClient.resetWinContainerIdCache() + if (timeDifferenceInMinutes >= 5) + $log.info "in_win_cadvisor_perf: Getting windows nodes" + nodes = KubernetesApiClient.getWindowsNodes() + if !nodes.nil? + @@winNodes = KubernetesApiClient.getWindowsNodes() + end + $log.info "in_win_cadvisor_perf : Successuly got windows nodes after 5 minute interval" + @@winNodeQueryTimeTracker = DateTime.now.to_time.to_i + end + @@winNodes.each do |winNode| + metricData = CAdvisorMetricsAPIClient.getMetrics(winNode) + metricData.each do |record| + if !record.empty? + record["DataType"] = "LINUX_PERF_BLOB" + record["IPName"] = "LogManagement" + eventStream.add(time, record) if record + end + end + router.emit_stream(@tag, eventStream) if eventStream + router.emit_stream(@mdmtag, eventStream) if eventStream + + @@istestvar = ENV["ISTEST"] + if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0) + $log.info("winCAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}") + end + end + + # Cleanup routine to clear deleted containers from cache + cleanupTimeDifference = (DateTime.now.to_time.to_i - @@cleanupRoutineTimeTracker).abs + cleanupTimeDifferenceInMinutes = cleanupTimeDifference / 60 + if (cleanupTimeDifferenceInMinutes >= 5) + $log.info "in_win_cadvisor_perf : Cleanup routine kicking in to clear deleted containers from cache" + CAdvisorMetricsAPIClient.clearDeletedWinContainersFromCache() + @@cleanupRoutineTimeTracker = DateTime.now.to_time.to_i + end + rescue => errorStr + $log.warn "Failed to retrieve cadvisor metric data for windows nodes: #{errorStr}" + $log.debug_backtrace(errorStr.backtrace) + end + end + + def run_periodic + @mutex.lock + done = @finished + until done + @condition.wait(@mutex, @run_interval) + done = @finished + @mutex.unlock + if !done + begin + $log.info("in_win_cadvisor_perf::run_periodic @ #{Time.now.utc.iso8601}") + enumerate + rescue => errorStr + $log.warn "in_win_cadvisor_perf::run_periodic: enumerate Failed to retrieve cadvisor perf metrics for windows nodes: #{errorStr}" + end + end + @mutex.lock + end + @mutex.unlock + end + end # Win_CAdvisor_Perf_Input +end # module diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb index 93b32ef50..963069858 100644 --- a/source/code/plugin/out_mdm.rb +++ b/source/code/plugin/out_mdm.rb @@ -2,29 +2,27 @@ # frozen_string_literal: true module Fluent - class OutputMDM < BufferedOutput - config_param :retry_mdm_post_wait_minutes, :integer - Plugin.register_output('out_mdm', self) + Plugin.register_output("out_mdm", self) def initialize super - require 'net/http' - require 'net/https' - require 'uri' - require 'json' - require_relative 'KubernetesApiClient' - require_relative 'ApplicationInsightsUtility' + require "net/http" + require "net/https" + require "uri" + require "json" + require_relative "KubernetesApiClient" + require_relative "ApplicationInsightsUtility" - @@token_resource_url = 'https://monitoring.azure.com/' - @@grant_type = 'client_credentials' - @@azure_json_path = '/etc/kubernetes/host/azure.json' + @@token_resource_url = "https://monitoring.azure.com/" + @@grant_type = "client_credentials" + @@azure_json_path = "/etc/kubernetes/host/azure.json" @@post_request_url_template = "https://%{aks_region}.monitoring.azure.com%{aks_resource_id}/metrics" @@token_url_template = "https://login.microsoftonline.com/%{tenant_id}/oauth2/token" @@plugin_name = "AKSCustomMetricsMDM" - + @data_hash = {} @token_url = nil @http_client = nil @@ -50,12 +48,13 @@ def start @can_send_data_to_mdm = false return end - # Handle the case where the file read fails. Send Telemetry and exit the plugin? + # Handle the case where the file read fails. Send Telemetry and exit the plugin? @data_hash = JSON.parse(file) - @token_url = @@token_url_template % {tenant_id: @data_hash['tenantId']} + @token_url = @@token_url_template % {tenant_id: @data_hash["tenantId"]} @cached_access_token = get_access_token - aks_resource_id = ENV['AKS_RESOURCE_ID'] - aks_region = ENV['AKS_REGION'] + aks_resource_id = ENV["AKS_RESOURCE_ID"] + aks_region = ENV["AKS_REGION"] + if aks_resource_id.to_s.empty? @log.info "Environment Variable AKS_RESOURCE_ID is not set.. " @can_send_data_to_mdm = false @@ -77,7 +76,7 @@ def start # get the access token only if the time to expiry is less than 5 minutes def get_access_token - if @cached_access_token.to_s.empty? || (Time.now + 5*60 > @token_expiry_time) # token is valid for 60 minutes. Refresh token 5 minutes from expiration + if @cached_access_token.to_s.empty? || (Time.now + 5 * 60 > @token_expiry_time) # token is valid for 60 minutes. Refresh token 5 minutes from expiration @log.info "Refreshing access token for out_mdm plugin.." token_uri = URI.parse(@token_url) http_access_token = Net::HTTP.new(token_uri.host, token_uri.port) @@ -85,27 +84,27 @@ def get_access_token token_request = Net::HTTP::Post.new(token_uri.request_uri) token_request.set_form_data( { - 'grant_type' => @@grant_type, - 'client_id' => @data_hash['aadClientId'], - 'client_secret' => @data_hash['aadClientSecret'], - 'resource' => @@token_resource_url - } + "grant_type" => @@grant_type, + "client_id" => @data_hash["aadClientId"], + "client_secret" => @data_hash["aadClientSecret"], + "resource" => @@token_resource_url, + } ) - + token_response = http_access_token.request(token_request) - # Handle the case where the response is not 200 + # Handle the case where the response is not 200 parsed_json = JSON.parse(token_response.body) - @token_expiry_time = Time.now + 59*60 # set the expiry time to be ~one hour from current time - @cached_access_token = parsed_json['access_token'] + @token_expiry_time = Time.now + 59 * 60 # set the expiry time to be ~one hour from current time + @cached_access_token = parsed_json["access_token"] end @cached_access_token - end + end def write_status_file(success, message) - fn = '/var/opt/microsoft/omsagent/log/MDMIngestion.status' + fn = "/var/opt/microsoft/omsagent/log/MDMIngestion.status" status = '{ "operation": "MDMIngestion", "success": "%s", "message": "%s" }' % [success, message] begin - File.open(fn,'w') { |file| file.write(status) } + File.open(fn, "w") { |file| file.write(status) } rescue => e @log.debug "Error:'#{e}'" ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) @@ -123,13 +122,13 @@ def format(tag, time, record) end end - # This method is called every flush interval. Send the buffer chunk to MDM. + # This method is called every flush interval. Send the buffer chunk to MDM. # 'chunk' is a buffer chunk that includes multiple formatted records def write(chunk) begin - if (!@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes*60)) && @can_send_data_to_mdm + if (!@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes * 60)) && @can_send_data_to_mdm post_body = [] - chunk.msgpack_each {|(tag, record)| + chunk.msgpack_each { |(tag, record)| post_body.push(record.to_json) } send_to_mdm post_body @@ -137,7 +136,7 @@ def write(chunk) if !@can_send_data_to_mdm @log.info "Cannot send data to MDM since all required conditions were not met" else - @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time)/60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP" + @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time) / 60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP" end end rescue Exception => e @@ -146,12 +145,12 @@ def write(chunk) end end - def send_to_mdm(post_body) + def send_to_mdm(post_body) begin access_token = get_access_token request = Net::HTTP::Post.new(@post_request_uri.request_uri) - request['Content-Type'] = "application/x-ndjson" - request['Authorization'] = "Bearer #{access_token}" + request["Content-Type"] = "application/x-ndjson" + request["Authorization"] = "Bearer #{access_token}" request.body = post_body.join("\n") response = @http_client.request(request) response.value # this throws for non 200 HTTP response code @@ -166,10 +165,10 @@ def send_to_mdm(post_body) @first_post_attempt_made = true ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) # Not raising exception, as that will cause retries to happen - elsif !response.code.empty? && response.code.start_with?('4') + elsif !response.code.empty? && response.code.start_with?("4") # Log 400 errors and continue @log.info "Non-retryable HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}" - else + else # raise if the response code is non-400 @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}" raise e @@ -186,7 +185,8 @@ def send_to_mdm(post_body) raise e end end - private + + private class ChunkErrorHandler include Configurable @@ -218,20 +218,20 @@ def router=(r) end def write(chunk) - chunk.msgpack_each {|(tag, record)| + chunk.msgpack_each { |(tag, record)| @error_handlers[tag].emit(record) } end - - private + + private def create_error_handlers(router) nop_handler = NopErrorHandler.new Hash.new() { |hash, tag| etag = OMS::Common.create_error_tag tag hash[tag] = router.match?(etag) ? - ErrorHandler.new(router, etag) : - nop_handler + ErrorHandler.new(router, etag) : + nop_handler } end @@ -251,10 +251,6 @@ def emit(record) # NOP end end - end - end # class OutputMDM - end # module Fluent - From ebdd8cc119a77752fd543225878f36e055812d14 Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Mon, 8 Apr 2019 11:55:52 -0700 Subject: [PATCH 84/88] adding os to container inventory for windows nodes (#210) --- source/code/plugin/CAdvisorMetricsAPIClient.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb index 8b4fd9fcf..35cf727cf 100644 --- a/source/code/plugin/CAdvisorMetricsAPIClient.rb +++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb @@ -318,6 +318,7 @@ def getContainerCpuMetricItemRate(metricJSON, hostName, cpuMetricNameToCollect, telemetryProperties = {} telemetryProperties["Computer"] = hostName telemetryProperties["ContainerCount"] = containerCount + telemetryProperties["OS"] = "Windows" # Hardcoding the event to ContainerInventory hearbeat event since the telemetry is pivoted off of this event. @Log.info "sending container inventory heartbeat telemetry" ApplicationInsightsUtility.sendCustomEvent("ContainerInventoryHeartBeatEvent", telemetryProperties) From d7b8cff1d9b20f3894fdd91c0e1cd3b69a465ed9 Mon Sep 17 00:00:00 2001 From: Dilip Raghunathan Date: Mon, 8 Apr 2019 15:40:31 -0700 Subject: [PATCH 85/88] Fix omsagent crash Error when kube-api returns non-200, send events for HTTP Errors (#211) * Fix omsagent crash Error when kube-api returns non-200, send events for HTTP Errors * Fixing the bug, deferring telemetry changes for later --- source/code/plugin/filter_cadvisor2mdm.rb | 102 +++++++++++----------- source/code/plugin/out_mdm.rb | 2 +- 2 files changed, 54 insertions(+), 50 deletions(-) diff --git a/source/code/plugin/filter_cadvisor2mdm.rb b/source/code/plugin/filter_cadvisor2mdm.rb index 94f2107cc..a6e643e45 100644 --- a/source/code/plugin/filter_cadvisor2mdm.rb +++ b/source/code/plugin/filter_cadvisor2mdm.rb @@ -10,45 +10,45 @@ module Fluent class CAdvisor2MdmFilter < Filter Fluent::Plugin.register_filter('filter_cadvisor2mdm', self) - + config_param :enable_log, :integer, :default => 0 config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log' config_param :custom_metrics_azure_regions, :string config_param :metrics_to_collect, :string, :default => 'cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes' - + @@cpu_usage_milli_cores = 'cpuUsageMillicores' @@cpu_usage_nano_cores = 'cpuusagenanocores' @@object_name_k8s_node = 'K8SNode' @@hostName = (OMS::Common.get_hostname) @@custom_metrics_template = ' - { - "time": "%{timestamp}", - "data": { - "baseData": { - "metric": "%{metricName}", - "namespace": "Insights.Container/nodes", - "dimNames": [ + { + "time": "%{timestamp}", + "data": { + "baseData": { + "metric": "%{metricName}", + "namespace": "Insights.Container/nodes", + "dimNames": [ "host" - ], - "series": [ - { - "dimValues": [ + ], + "series": [ + { + "dimValues": [ "%{hostvalue}" - ], + ], "min": %{metricminvalue}, - "max": %{metricmaxvalue}, - "sum": %{metricsumvalue}, - "count": 1 - } - ] - } - } + "max": %{metricmaxvalue}, + "sum": %{metricsumvalue}, + "count": 1 + } + ] + } + } }' - + @@metric_name_metric_percentage_name_hash = { - @@cpu_usage_milli_cores => "cpuUsagePercentage", + @@cpu_usage_milli_cores => "cpuUsagePercentage", "memoryRssBytes" => "memoryRssPercentage", - "memoryWorkingSetBytes" => "memoryWorkingSetPercentage" + "memoryWorkingSetBytes" => "memoryWorkingSetPercentage" } @process_incoming_stream = true @@ -61,7 +61,7 @@ def initialize def configure(conf) super @log = nil - + if @enable_log @log = Logger.new(@log_path, 1, 5000000) @log.debug {'Starting filter_cadvisor2mdm plugin'} @@ -70,15 +70,19 @@ def configure(conf) def start super - @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions) - @metrics_to_collect_hash = build_metrics_hash - @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}" - - # initialize cpu and memory limit - if @process_incoming_stream - @cpu_capacity = 0.0 - @memory_capacity = 0.0 - ensure_cpu_memory_capacity_set + begin + @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions) + @metrics_to_collect_hash = build_metrics_hash + @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}" + + # initialize cpu and memory limit + if @process_incoming_stream + @cpu_capacity = 0.0 + @memory_capacity = 0.0 + ensure_cpu_memory_capacity_set + end + rescue => e + @log.info "Error initializing plugin #{e}" end end @@ -117,9 +121,9 @@ def filter(tag, time, record) if @memory_capacity != 0.0 percentage_metric_value = metric_value*100/@memory_capacity end - end + end return get_metric_records(record, metric_name, metric_value, percentage_metric_value) - else + else return [] end else @@ -140,13 +144,13 @@ def ensure_cpu_memory_capacity_set return end - begin + begin nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes?fieldSelector=metadata.name%3D#{@@hostName}").body) rescue Exception => e @log.info "Error when getting nodeInventory from kube API. Exception: #{e.class} Message: #{e.message} " ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) end - if !nodeInventory.nil? + if !nodeInventory.nil? cpu_capacity_json = KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores") if !cpu_capacity_json.nil? && !cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'].to_s.nil? @cpu_capacity = cpu_capacity_json[0]['DataItems'][0]['Collections'][0]['Value'] @@ -163,7 +167,7 @@ def ensure_cpu_memory_capacity_set end end end - + def get_metric_records(record, metric_name, metric_value, percentage_metric_value) records = [] custommetricrecord = @@custom_metrics_template % { @@ -194,20 +198,20 @@ def get_metric_records(record, metric_name, metric_value, percentage_metric_valu return records end - + def filter_stream(tag, es) new_es = MultiEventStream.new - ensure_cpu_memory_capacity_set - es.each { |time, record| - begin + begin + ensure_cpu_memory_capacity_set + es.each { |time, record| filtered_records = filter(tag, time, record) - filtered_records.each {|filtered_record| + filtered_records.each {|filtered_record| new_es.add(time, filtered_record) if filtered_record - } if filtered_records - rescue => e - router.emit_error_event(tag, time, record, e) - end - } + } if filtered_records + } + rescue => e + @log.info "Error in filter_stream #{e.message}" + end new_es end end diff --git a/source/code/plugin/out_mdm.rb b/source/code/plugin/out_mdm.rb index 963069858..351198afe 100644 --- a/source/code/plugin/out_mdm.rb +++ b/source/code/plugin/out_mdm.rb @@ -140,6 +140,7 @@ def write(chunk) end end rescue Exception => e + ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) @log.info "Exception when writing to MDM: #{e}" raise e end @@ -163,7 +164,6 @@ def send_to_mdm(post_body) @log.info "Response Code #{response.code} Updating @last_post_attempt_time" @last_post_attempt_time = Time.now @first_post_attempt_made = true - ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace) # Not raising exception, as that will cause retries to happen elsif !response.code.empty? && response.code.start_with?("4") # Log 400 errors and continue From c9bb623c2c0aa6642e0baab3b0ebcf313c4627eb Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Wed, 10 Apr 2019 16:28:47 -0700 Subject: [PATCH 86/88] updating to lowercase compare for units (#212) --- source/code/plugin/KubernetesApiClient.rb | 66 +++++++++++------------ 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb index 4ed85025f..3c6b4f203 100644 --- a/source/code/plugin/KubernetesApiClient.rb +++ b/source/code/plugin/KubernetesApiClient.rb @@ -439,58 +439,58 @@ def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNamet end #parseNodeLimits def getMetricNumericValue(metricName, metricVal) - metricValue = metricVal + metricValue = metricVal.downcase begin case metricName when "memory" #convert to bytes for memory #https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource/ - if (metricValue.end_with?("Ki")) - metricValue.chomp!("Ki") + if (metricValue.end_with?("ki")) + metricValue.chomp!("ki") metricValue = Float(metricValue) * 1024.0 ** 1 - elsif (metricValue.end_with?("Mi")) - metricValue.chomp!("Mi") + elsif (metricValue.end_with?("mi")) + metricValue.chomp!("mi") metricValue = Float(metricValue) * 1024.0 ** 2 - elsif (metricValue.end_with?("Gi")) - metricValue.chomp!("Gi") + elsif (metricValue.end_with?("gi")) + metricValue.chomp!("gi") metricValue = Float(metricValue) * 1024.0 ** 3 - elsif (metricValue.end_with?("Ti")) - metricValue.chomp!("Ti") + elsif (metricValue.end_with?("ti")) + metricValue.chomp!("ti") metricValue = Float(metricValue) * 1024.0 ** 4 - elsif (metricValue.end_with?("Pi")) - metricValue.chomp!("Pi") + elsif (metricValue.end_with?("pi")) + metricValue.chomp!("pi") metricValue = Float(metricValue) * 1024.0 ** 5 - elsif (metricValue.end_with?("Ei")) - metricValue.chomp!("Ei") + elsif (metricValue.end_with?("ei")) + metricValue.chomp!("ei") metricValue = Float(metricValue) * 1024.0 ** 6 - elsif (metricValue.end_with?("Zi")) - metricValue.chomp!("Zi") + elsif (metricValue.end_with?("zi")) + metricValue.chomp!("zi") metricValue = Float(metricValue) * 1024.0 ** 7 - elsif (metricValue.end_with?("Yi")) - metricValue.chomp!("Yi") + elsif (metricValue.end_with?("yi")) + metricValue.chomp!("yi") metricValue = Float(metricValue) * 1024.0 ** 8 - elsif (metricValue.end_with?("K")) - metricValue.chomp!("K") + elsif (metricValue.end_with?("k")) + metricValue.chomp!("k") metricValue = Float(metricValue) * 1000.0 ** 1 - elsif (metricValue.end_with?("M")) - metricValue.chomp!("M") + elsif (metricValue.end_with?("m")) + metricValue.chomp!("m") metricValue = Float(metricValue) * 1000.0 ** 2 - elsif (metricValue.end_with?("G")) - metricValue.chomp!("G") + elsif (metricValue.end_with?("g")) + metricValue.chomp!("g") metricValue = Float(metricValue) * 1000.0 ** 3 - elsif (metricValue.end_with?("T")) - metricValue.chomp!("T") + elsif (metricValue.end_with?("t")) + metricValue.chomp!("t") metricValue = Float(metricValue) * 1000.0 ** 4 - elsif (metricValue.end_with?("P")) - metricValue.chomp!("P") + elsif (metricValue.end_with?("p")) + metricValue.chomp!("p") metricValue = Float(metricValue) * 1000.0 ** 5 - elsif (metricValue.end_with?("E")) - metricValue.chomp!("E") + elsif (metricValue.end_with?("e")) + metricValue.chomp!("e") metricValue = Float(metricValue) * 1000.0 ** 6 - elsif (metricValue.end_with?("Z")) - metricValue.chomp!("Z") + elsif (metricValue.end_with?("z")) + metricValue.chomp!("z") metricValue = Float(metricValue) * 1000.0 ** 7 - elsif (metricValue.end_with?("Y")) - metricValue.chomp!("Y") + elsif (metricValue.end_with?("y")) + metricValue.chomp!("y") metricValue = Float(metricValue) * 1000.0 ** 8 else #assuming there are no units specified, it is bytes (the below conversion will fail for other unsupported 'units') metricValue = Float(metricValue) From 3a88db8e5b1005564e54625959972e176835f9d4 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Tue, 16 Apr 2019 13:00:00 -0700 Subject: [PATCH 87/88] Merge from vishwa/telegraftcp to ci_feature for telegraf changes (#214) * merge from Vishwa/telegraf to Vishwa/telegraftcp for telegraf changes (#207) * add configuration for telegraf * fix for perms * fix telegraf config. * fix file location & config * update to config * fix namespace * trying different namespace and also debug=true * add placeholder for nodename * change namespace * updated config * fix uri * fix azMon settings * remove aad settings * add custom metrics regions * fix config * add support for replica-set config * fix oomkilled * Add telegraf 403 metric telemetry & non 403 trace telemetry * fix type * fix package * fix package import * fix filename * delete unused file * conf file for rs; fix 403counttotal metric for telegraf, remove host and use nodeName consistently, rename metrics * fix statefulsets * fix typo. * fix another typo. * fix telemetry * fix casing issue * fix comma issue. * disable telemetry for rs ; fix stateful set name * worksround for namespace fix * telegraf integration - v1 * telemetry changes for telegraf * telemetry & other changes * remove custom metric regions as we dont need anymore * remove un-needed files * fixes * exclude certain volumes and fix telemetry to not have computer & nodename as dimensions (redundant) * Vishwa/resourcecentric (#208) (#209) * resourceid fix (for AKS only) * fix name * near final metric shape * change from customlog to fixed type (InsightsMetrics) * fix PR feedback * fix pr feedback --- installer/conf/td-agent-bit.conf | 27 +- installer/conf/telegraf.conf | 519 ++++++++++++++++++ installer/datafiles/base_container.data | 3 + .../scripts/TelegrafTCPErrorTelemetry.sh | 3 + source/code/go/src/plugins/oms.go | 241 +++++++- source/code/go/src/plugins/out_oms.go | 22 +- source/code/go/src/plugins/telemetry.go | 26 +- 7 files changed, 821 insertions(+), 20 deletions(-) create mode 100644 installer/conf/telegraf.conf create mode 100644 installer/scripts/TelegrafTCPErrorTelemetry.sh diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 78a7b2dde..88bacaca2 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -23,10 +23,33 @@ Mem_Buf_Limit 2m Path_Key filepath Skip_Long_Lines On + Ignore_Older 5m + +[INPUT] + Name tail + Tag oms.container.log.telegraf.err.* + Path /var/opt/microsoft/docker-cimprov/log/telegraf.log + DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db + Mem_Buf_Limit 2m + Path_Key filepath + Skip_Long_Lines On + Ignore_Older 5m + +[INPUT] + Name tcp + Tag oms.container.perf.telegraf.* + Listen 0.0.0.0 + Port 25226 + Chunk_Size 32 + Buffer_Size 64 + +[FILTER] + Name grep + Match oms.container.log.telegraf.err.* + #Regex log /^(?:(?!\[azure_monitor\]: failed to write batch: \[403\] 403 Forbidden).)*$/ [OUTPUT] Name oms EnableTelemetry true TelemetryPushIntervalSeconds 300 - Match oms.container.log.* - AgentVersion ciprod03122019 \ No newline at end of file + Match oms.container.* diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf new file mode 100644 index 000000000..355c88b3d --- /dev/null +++ b/installer/conf/telegraf.conf @@ -0,0 +1,519 @@ +# Telegraf Configuration +# +# Telegraf is entirely plugin driven. All metrics are gathered from the +# declared inputs, and sent to the declared outputs. +# +# Plugins must be declared in here to be active. +# To deactivate a plugin, comment out the name and any variables. +# +# Use 'telegraf -config telegraf.conf -test' to see what metrics a config +# file would generate. +# +# Environment variables can be used anywhere in this config file, simply prepend +# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), +# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) + + +# Global tags can be specified here in key="value" format. +[global_tags] + #Below are entirely used for telemetry + AgentVersion = "$AGENT_VERSION" + AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" + ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME" + Region = "$TELEMETRY_AKS_REGION" + ClusterName = "$TELEMETRY_CLUSTER_NAME" + ClusterType = "$TELEMETRY_CLUSTER_TYPE" + Computer = "placeholder_hostname" + ControllerType = "$CONTROLLER_TYPE" + + hostName = "placeholder_hostname" + + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "60s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at most + ## metric_batch_size metrics. + ## This controls the size of writes that Telegraf sends to output plugins. + metric_batch_size = 1000 + + ## For failed writes, telegraf will cache metric_buffer_limit metrics for each + ## output, and will flush this buffer on a successful write. Oldest metrics + ## are dropped first when this buffer fills. + ## This buffer only fills when writes fail to output plugin(s). + metric_buffer_limit = 10000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Default flushing interval for all outputs. You shouldn't set this below + ## interval. Maximum flush_interval will be flush_interval + flush_jitter + flush_interval = "60s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## By default or when set to "0s", precision will be set to the same + ## timestamp order as the collection interval, with the maximum being 1s. + ## ie, when interval = "10s", precision will be "1s" + ## when interval = "250ms", precision will be "1ms" + ## Precision will NOT be used for service inputs. It is up to each individual + ## service input to set the timestamp at the appropriate precision. + ## Valid time units are "ns", "us" (or "µs"), "ms", "s". + precision = "" + + ## Logging configuration: + ## Run telegraf with debug log messages. + debug = false + ## Run telegraf in quiet mode (error log messages only). + quiet = true + ## Specify the log file name. The empty string means to log to stderr. + logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log" + + ## Override default hostname, if empty use os.Hostname() + #hostname = "placeholder_hostname" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = true + + +############################################################################### +# OUTPUT PLUGINS # +############################################################################### + +# Generic socket writer capable of handling multiple socket types. +[[outputs.socket_writer]] + ## URL to connect to + address = "tcp://0.0.0.0:25226" + # address = "tcp://example.com:http" + # address = "tcp4://127.0.0.1:8094" + # address = "tcp6://127.0.0.1:8094" + # address = "tcp6://[2001:db8::1]:8094" + # address = "udp://127.0.0.1:8094" + # address = "udp4://127.0.0.1:8094" + # address = "udp6://127.0.0.1:8094" + # address = "unix:///tmp/telegraf.sock" + # address = "unixgram:///tmp/telegraf.sock" + + ## Optional TLS Config + # tls_ca = "/etc/telegraf/ca.pem" + # tls_cert = "/etc/telegraf/cert.pem" + # tls_key = "/etc/telegraf/key.pem" + ## Use TLS but skip chain & host verification + # insecure_skip_verify = false + + ## Period between keep alive probes. + ## Only applies to TCP sockets. + ## 0 disables keep alive probes. + ## Defaults to the OS configuration. + # keep_alive_period = "5m" + + ## Data format to generate. + ## Each data format has its own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "json" + namedrop = ["telegraf_telemetry"] + tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"] + +[[outputs.application_insights]] + ## Instrumentation key of the Application Insights resource. + instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY" + + ## Timeout for closing (default: 5s). + # timeout = "5s" + + ## Enable additional diagnostic logging. + # enable_diagnostic_logging = false + + ## Context Tag Sources add Application Insights context tags to a tag value. + ## + ## For list of allowed context tag keys see: + ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go + # [outputs.application_insights.context_tag_sources] + # "ai.cloud.role" = "kubernetes_container_name" + # "ai.cloud.roleInstance" = "kubernetes_pod_name" + namepass = ["telegraf_telemetry"] + #tagdrop = ["nodeName"] + +############################################################################### +# PROCESSOR PLUGINS # +############################################################################### + +# # Perform string processing on tags, fields, and measurements +#[[processors.rename]] + #[[processors.rename.replace]] + # measurement = "disk" + # dest = "nodes" +# [[processors.rename.replace]] +# field = "free" +# dest = "freeBytes" +# [[processors.rename.replace]] +# field = "used" +# dest = "usedBytes" +# [[processors.rename.replace]] +# field = "used_percent" +# dest = "usedPercentage" + #[[processors.rename.replace]] + # measurement = "net" + # dest = "nodes" + #[[processors.rename.replace]] + # field = "bytes_recv" + # dest = "networkBytesReceivedTotal" + #[[processors.rename.replace]] + # field = "bytes_sent" + # dest = "networkBytesSentTotal" + #[[processors.rename.replace]] + # field = "err_in" + # dest = "networkErrorsInTotal" + #[[processors.rename.replace]] + # field = "err_out" + # dest = "networkErrorsOutTotal" + #[[processors.rename.replace]] + # measurement = "kubernetes_pod_volume" + # dest = "pods" + #[[processors.rename.replace]] + # field = "used_bytes" + # dest = "podVolumeUsedBytes" + #[[processors.rename.replace]] + # field = "available_bytes" + # dest = "podVolumeAvailableBytes" + #[[processors.rename.replace]] + # measurement = "kubernetes_pod_network" + # dest = "pods" + #[[processors.rename.replace]] + # field = "tx_errors" + # dest = "podNetworkTxErrorsTotal" + #[[processors.rename.replace]] + # field = "rx_errors" + # dest = "podNetworkRxErrorsTotal" + #[[processors.rename.replace]] + # tag = "volume_name" + # dest = "volumeName" + #[[processors.rename.replace]] + # tag = "pod_name" + # dest = "podName" + #[[processors.rename.replace]] + # measurement = "docker" + # dest = "containers" + #[[processors.rename.replace]] + # measurement = "docker_container_status" + # dest = "containers" + #[[processors.rename.replace]] + # field = "n_containers" + # dest = "numContainers" + #[[processors.rename.replace]] + # field = "n_containers_running" + # dest = "numContainersRunning" + #[[processors.rename.replace]] + # field = "n_containers_stopped" + # dest = "numContainersStopped" + #[[processors.rename.replace]] + # field = "n_containers_paused" + # dest = "numContainersPaused" + #[[processors.rename.replace]] + # field = "n_images" + # dest = "numContainerImages" + +# ## Convert a tag value to uppercase +# # [[processors.strings.uppercase]] +# # tag = "method" +# +# ## Convert a field value to lowercase and store in a new field +# # [[processors.strings.lowercase]] +# # field = "uri_stem" +# # dest = "uri_stem_normalised" +# +# ## Trim leading and trailing whitespace using the default cutset +# # [[processors.strings.trim]] +# # field = "message" +# +# ## Trim leading characters in cutset +# # [[processors.strings.trim_left]] +# # field = "message" +# # cutset = "\t" +# +# ## Trim trailing characters in cutset +# # [[processors.strings.trim_right]] +# # field = "message" +# # cutset = "\r\n" +# +# ## Trim the given prefix from the field +# # [[processors.strings.trim_prefix]] +# # field = "my_value" +# # prefix = "my_" +# +# ## Trim the given suffix from the field +# # [[processors.strings.trim_suffix]] +# # field = "read_count" +# # suffix = "_count" + + +# # Print all metrics that pass through this filter. +# [[processors.topk]] +# ## How many seconds between aggregations +# # period = 10 +# +# ## How many top metrics to return +# # k = 10 +# +# ## Over which tags should the aggregation be done. Globs can be specified, in +# ## which case any tag matching the glob will aggregated over. If set to an +# ## empty list is no aggregation over tags is done +# # group_by = ['*'] +# +# ## Over which fields are the top k are calculated +# # fields = ["value"] +# +# ## What aggregation to use. Options: sum, mean, min, max +# # aggregation = "mean" +# +# ## Instead of the top k largest metrics, return the bottom k lowest metrics +# # bottomk = false +# +# ## The plugin assigns each metric a GroupBy tag generated from its name and +# ## tags. If this setting is different than "" the plugin will add a +# ## tag (which name will be the value of this setting) to each metric with +# ## the value of the calculated GroupBy tag. Useful for debugging +# # add_groupby_tag = "" +# +# ## These settings provide a way to know the position of each metric in +# ## the top k. The 'add_rank_field' setting allows to specify for which +# ## fields the position is required. If the list is non empty, then a field +# ## will be added to each and every metric for each string present in this +# ## setting. This field will contain the ranking of the group that +# ## the metric belonged to when aggregated over that field. +# ## The name of the field will be set to the name of the aggregation field, +# ## suffixed with the string '_topk_rank' +# # add_rank_fields = [] +# +# ## These settings provide a way to know what values the plugin is generating +# ## when aggregating metrics. The 'add_agregate_field' setting allows to +# ## specify for which fields the final aggregation value is required. If the +# ## list is non empty, then a field will be added to each every metric for +# ## each field present in this setting. This field will contain +# ## the computed aggregation for the group that the metric belonged to when +# ## aggregated over that field. +# ## The name of the field will be set to the name of the aggregation field, +# ## suffixed with the string '_topk_aggregate' +# # add_aggregate_fields = [] + + + +############################################################################### +# AGGREGATOR PLUGINS # +############################################################################### + +# # Keep the aggregate basicstats of each metric passing through. +# [[aggregators.basicstats]] +# ## General Aggregator Arguments: +# ## The period on which to flush & clear the aggregator. +# period = "30s" +# ## If true, the original metric will be dropped by the +# ## aggregator and will not get sent to the output plugins. +# drop_original = false + + +# # Create aggregate histograms. +# [[aggregators.histogram]] +# ## The period in which to flush the aggregator. +# period = "30s" +# +# ## If true, the original metric will be dropped by the +# ## aggregator and will not get sent to the output plugins. +# drop_original = false +# +# ## Example config that aggregates all fields of the metric. +# # [[aggregators.histogram.config]] +# # ## The set of buckets. +# # buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0] +# # ## The name of metric. +# # measurement_name = "cpu" +# +# ## Example config that aggregates only specific fields of the metric. +# # [[aggregators.histogram.config]] +# # ## The set of buckets. +# # buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] +# # ## The name of metric. +# # measurement_name = "diskio" +# # ## The concrete fields of metric +# # fields = ["io_time", "read_time", "write_time"] + + +# # Keep the aggregate min/max of each metric passing through. +# [[aggregators.minmax]] +# ## General Aggregator Arguments: +# ## The period on which to flush & clear the aggregator. +# period = "30s" +# ## If true, the original metric will be dropped by the +# ## aggregator and will not get sent to the output plugins. +# drop_original = false + + +# # Count the occurance of values in fields. +# [[aggregators.valuecounter]] +# ## General Aggregator Arguments: +# ## The period on which to flush & clear the aggregator. +# period = "30s" +# ## If true, the original metric will be dropped by the +# ## aggregator and will not get sent to the output plugins. +# drop_original = false +# ## The fields for which the values will be counted +# fields = [] + + + +############################################################################### +# INPUT PLUGINS # +############################################################################### + +# Read metrics about cpu usage +#[[inputs.cpu]] + ## Whether to report per-cpu stats or not +# percpu = false + ## Whether to report total system cpu stats or not +# totalcpu = true + ## If true, collect raw CPU time metrics. +# collect_cpu_time = false + ## If true, compute and report the sum of all non-idle CPU states. +# report_active = true +# fieldpass = ["usage_active","cluster","node","host","device"] +# taginclude = ["cluster","cpu","node"] + + + +# Read metrics about disk usage by mount point +[[inputs.disk]] + ## By default stats will be gathered for all mount points. + ## Set mount_points will restrict the stats to only the specified mount points. + # mount_points = ["/"] + + ## Ignore mount points by filesystem type. + ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"] + fieldpass = ["free", "used", "used_percent"] + taginclude = ["device","path","hostName"] + # Below due to Bug - https://github.com/influxdata/telegraf/issues/5615 + # ORDER matters here!! - i.e the below should be the LAST modifier + [inputs.disk.tagdrop] + path = ["/var/lib/kubelet*", "/dev/termination-log", "/var/log", "/etc/hosts", "/etc/resolv.conf", "/etc/hostname", "/etc/kubernetes/host", "/var/lib/docker/containers"] + + +# Read metrics about memory usage +#[[inputs.mem]] +# fieldpass = ["used_percent", "cluster", "node","host","device"] +# taginclude = ["cluster","node"] + + +# Read metrics about network interface usage +#[[inputs.net]] + ## By default, telegraf gathers stats from any up interface (excluding loopback) + ## Setting interfaces will tell it to gather these explicit interfaces, + ## regardless of status. + ## + # interfaces = ["eth0"] + ## + ## On linux systems telegraf also collects protocol stats. + ## Setting ignore_protocol_stats to true will skip reporting of protocol metrics. + ## +# ignore_protocol_stats = true + ## + #fieldpass = ["bytes_recv", "bytes_sent", "err_in", "err_out"] + #fieldpass = ["err_in", "err_out"] + #taginclude = ["interface","nodeName"] + +# Read metrics from the kubernetes kubelet api +#[[inputs.kubernetes]] + ## URL for the kubelet + #url = "http://1.1.1.1:10255" +# url = "http://placeholder_nodeip:10255" + + ## Use bearer token for authorization + # bearer_token = /path/to/bearer/token + + ## Set response_timeout (default 5 seconds) + # response_timeout = "5s" + + ## Optional TLS Config + # tls_ca = /path/to/cafile + # tls_cert = /path/to/certfile + # tls_key = /path/to/keyfile + ## Use TLS but skip chain & host verification + # insecure_skip_verify = false +# fieldpass = ["used_bytes", "available_bytes", "tx_errors", "rx_errors" ] +# taginclude = ["volume_name","nodeName","namespace","pod_name"] +# Read metrics about docker containers +#[[inputs.docker]] + ## Docker Endpoint + ## To use TCP, set endpoint = "tcp://[ip]:[port]" + ## To use environment variables (ie, docker-machine), set endpoint = "ENV" +# endpoint = "unix:///var/run/host/docker.sock" + + ## Set to true to collect Swarm metrics(desired_replicas, running_replicas) +# gather_services = false + + ## Only collect metrics for these containers, collect all if empty +# container_names = [] + + ## Containers to include and exclude. Globs accepted. + ## Note that an empty array for both will include all containers +# container_name_include = [] +# container_name_exclude = [] + + ## Container states to include and exclude. Globs accepted. + ## When empty only containers in the "running" state will be captured. +# container_state_include = ['*'] + # container_state_exclude = [] + + ## Timeout for docker list, info, and stats commands +# timeout = "5s" + + ## Whether to report for each container per-device blkio (8:0, 8:1...) and + ## network (eth0, eth1, ...) stats or not +# perdevice = true + ## Whether to report for each container total blkio and network stats or not +# total = true + ## Which environment variables should we use as a tag + ##tag_env = ["JAVA_HOME", "HEAP_SIZE"] + + ## docker labels to include and exclude as tags. Globs accepted. + ## Note that an empty array for both will include all labels as tags +# docker_label_include = [] +# docker_label_exclude = [] + + ## Optional TLS Config + # tls_ca = "/etc/telegraf/ca.pem" + # tls_cert = "/etc/telegraf/cert.pem" + # tls_key = "/etc/telegraf/key.pem" + ## Use TLS but skip chain & host verification + # insecure_skip_verify = false +# fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images"] + #fieldpass = ["numContainers", "numContainersRunning", "numContainersStopped", "numContainersPaused", "numContainerImages"] +# taginclude = ["nodeName"] +[[inputs.exec]] + ## Commands array + interval = "15m" + commands = [ + "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh" + ] + + ## Timeout for each command to complete. + timeout = "15s" + + ## measurement name suffix (for separating different commands) + name_suffix = "_telemetry" + + ## Data format to consume. + ## Each data format has its own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "influx" + tagexclude = ["hostName"] + diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 9c4d563f8..996c7501a 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -98,6 +98,8 @@ MAINTAINER: 'Microsoft Corporation' /opt/td-agent-bit/bin/out_oms.so; intermediate/${{BUILD_CONFIGURATION}}/out_oms.so; 755; root; root /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf; installer/conf/td-agent-bit.conf; 644; root; root /etc/opt/microsoft/docker-cimprov/out_oms.conf; installer/conf/out_oms.conf; 644; root; root +/etc/opt/microsoft/docker-cimprov/telegraf.conf; installer/conf/telegraf.conf; 644; root; root +/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh; installer/scripts/TelegrafTCPErrorTelemetry.sh; 755; root; root %Links /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root @@ -137,6 +139,7 @@ MAINTAINER: 'Microsoft Corporation' /opt/td-agent-bit; 755; root; root;sysdir /opt/td-agent-bit/bin; 755; root; root;sysdir +/etc/telegraf; 755; root; root;sysdir /opt/microsoft/omsagent/plugin/lib; 755; root; root; sysdir /opt/microsoft/omsagent/plugin/lib/application_insights; 755; root; root; sysdir diff --git a/installer/scripts/TelegrafTCPErrorTelemetry.sh b/installer/scripts/TelegrafTCPErrorTelemetry.sh new file mode 100644 index 000000000..637af3969 --- /dev/null +++ b/installer/scripts/TelegrafTCPErrorTelemetry.sh @@ -0,0 +1,3 @@ +#!/bin/sh +countErr=$(grep -iF "socket_writer" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n') +echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID} telegrafTCPWriteErrorCountTotal=${countErr}i" \ No newline at end of file diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index a1ca3d6ee..269d16111 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -23,10 +23,31 @@ import ( ) // DataType for Container Log -const DataType = "CONTAINER_LOG_BLOB" +const ContainerLogDataType = "CONTAINER_LOG_BLOB" + +// DataType for Insights metric +const InsightsMetricsDataType = "INSIGHTS_METRICS_BLOB" + +//env varibale which has ResourceId for LA +const ResourceIdEnv = "AKS_RESOURCE_ID" + +//env variable which has ResourceName for NON-AKS +const ResourceNameEnv = "ACS_RESOURCE_NAME" + +// Origin prefix for telegraf Metrics (used as prefix for origin field & prefix for azure monitor specific tags) +const TelegrafMetricOriginPrefix = "container.azm.ms" +// Origin suffix for telegraf Metrics (used as suffix for origin field) +const TelegrafMetricOriginSuffix = "telegraf" +// Namespace prefix for telegraf Metrics (used as prefix for Namespace field) +//const TelegrafMetricNamespacePrefix = "plugin" +// clusterName tag +const TelegrafTagClusterName = "clusterName" +// clusterId tag +const TelegrafTagClusterID = "clusterId" // ContainerLogPluginConfFilePath --> config file path for container log plugin -const ContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf" +const DaemonSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf" +const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms-rs.conf" // IPName for Container Log const IPName = "Containers" @@ -44,10 +65,12 @@ var ( Computer string // WorkspaceID log analytics workspace id WorkspaceID string - // ResourceID for resource-centric log analytics data + // ResourceID for resource-centric log analytics data ResourceID string // Resource-centric flag (will be true if we determine if above RseourceID is non-empty - default is false) ResourceCentric bool + //ResourceName + ResourceName string ) var ( @@ -92,6 +115,26 @@ type DataItem struct { Computer string `json:"Computer"` } +// telegraf metric DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin +type laTelegrafMetric struct { + // 'golden' fields + Origin string `json:"Origin"` + Namespace string `json:"Namespace"` + Name string `json:"Name"` + Value float64 `json:"Value"` + Tags string `json:"Tags"` + // specific required fields for LA + CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated + Computer string `json:"Computer"` +} + +// ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point +type InsightsMetricsBlob struct { + DataType string `json:"DataType"` + IPName string `json:"IPName"` + DataItems []laTelegrafMetric `json:"DataItems"` +} + // ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point type ContainerLogBlob struct { DataType string `json:"DataType"` @@ -207,6 +250,174 @@ func updateKubeSystemContainerIDs() { } } +//Azure loganalytics metric values have to be numeric, so string values are dropped +func convert(in interface{}) (float64, bool) { + switch v := in.(type) { + case int64: + return float64(v), true + case uint64: + return float64(v), true + case float64: + return v, true + case bool: + if v { + return float64(1), true + } + return float64(0), true + default: + Log ("returning 0 for %v ", in) + return float64(0), false + } +} + +//Translates telegraf time series to one or more Azure loganalytics metric(s) +func translateTelegrafMetrics(m map[interface{}]interface{}) ([]*laTelegrafMetric, error) { + + var laMetrics []*laTelegrafMetric + var tags map[interface{}]interface{} + tags = m["tags"].(map[interface{}]interface{}) + tagMap := make(map[string]string) + for k, v := range tags { + key := fmt.Sprintf("%s",k) + if key == "" { + continue + } + tagMap[key] = fmt.Sprintf("%s",v) + } + + //add azure monitor tags + tagMap[fmt.Sprintf("%s/%s", TelegrafMetricOriginPrefix, TelegrafTagClusterID)] = ResourceID + tagMap[fmt.Sprintf("%s/%s", TelegrafMetricOriginPrefix, TelegrafTagClusterName)] = ResourceName + + var fieldMap map[interface{}]interface{} + fieldMap = m["fields"].(map[interface{}]interface{}) + + tagJson, err := json.Marshal(&tagMap) + + if err != nil { + return nil, err + } + + for k, v := range fieldMap { + fv, ok := convert(v) + if !ok { + continue + } + i := m["timestamp"].(uint64) + laMetric := laTelegrafMetric{ + Origin: fmt.Sprintf("%s/%s", TelegrafMetricOriginPrefix, TelegrafMetricOriginSuffix), + //Namespace: fmt.Sprintf("%s/%s", TelegrafMetricNamespacePrefix, m["name"]), + Namespace: fmt.Sprintf("%s", m["name"]), + Name: fmt.Sprintf("%s",k), + Value: fv, + Tags: fmt.Sprintf("%s", tagJson), + CollectionTime: time.Unix(int64(i),0).Format(time.RFC3339), + Computer: Computer, //this is the collection agent's computer name, not necessarily to which computer the metric applies to + } + + //Log ("la metric:%v", laMetric) + laMetrics = append(laMetrics, &laMetric) + } + return laMetrics, nil +} + +//send metrics from Telegraf to LA. 1) Translate telegraf timeseries to LA metric(s) 2) Send it to LA as 'InsightsMetrics' fixed type +func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int { + var laMetrics []*laTelegrafMetric + + if ( (telegrafRecords== nil) || ! (len(telegrafRecords) > 0) ) { + Log("PostTelegrafMetricsToLA::Error:no timeseries to derive") + return output.FLB_OK + } + + for _, record := range telegrafRecords { + translatedMetrics, err := translateTelegrafMetrics(record) + if err != nil { + message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when translating telegraf metric to log analytics metric %q", err) + Log(message) + //SendException(message) //This will be too noisy + } + laMetrics = append(laMetrics, translatedMetrics...) + } + + if ( (laMetrics == nil) || !(len(laMetrics) > 0) ) { + Log("PostTelegrafMetricsToLA::Info:no metrics derived from timeseries data") + return output.FLB_OK + } else { + message := fmt.Sprintf("PostTelegrafMetricsToLA::Info:derived %v metrics from %v timeseries", len(laMetrics), len(telegrafRecords)) + Log(message) + } + + var metrics []laTelegrafMetric + var i int + + for i=0; i < len(laMetrics); i++ { + metrics = append(metrics, *laMetrics[i]) + } + + laTelegrafMetrics := InsightsMetricsBlob{ + DataType: InsightsMetricsDataType, + IPName: IPName, + DataItems: metrics} + + jsonBytes, err := json.Marshal(laTelegrafMetrics) + + if err != nil { + message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err) + Log(message) + SendException(message) + return output.FLB_OK + } + + //Post metrics data to LA + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes)) + + //req.URL.Query().Add("api-version","2016-04-01") + + //set headers + req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339)) + + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) + } + + start := time.Now() + resp, err := HTTPClient.Do(req) + elapsed := time.Since(start) + + if err != nil { + message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error()) + Log(message) + SendException(message) + UpdateNumTelegrafMetricsSentTelemetry(0, 1) + return output.FLB_RETRY + } + + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("PostTelegrafMetricsToLA::Error:(retriable) Response Status %v Status Code %v", resp.Status, resp.StatusCode) + } + UpdateNumTelegrafMetricsSentTelemetry(0, 1) + return output.FLB_RETRY + } + + defer resp.Body.Close() + + numMetrics := len(laMetrics) + UpdateNumTelegrafMetricsSentTelemetry(numMetrics, 0) + Log("PostTelegrafMetricsToLA::Info:Successfully flushed %v records in %v", numMetrics, elapsed) + + return output.FLB_OK +} + +func UpdateNumTelegrafMetricsSentTelemetry(numMetricsSent int, numSendErrors int) { + ContainerLogTelemetryMutex.Lock() + TelegrafMetricsSentCount += float64(numMetricsSent) + TelegrafMetricsSendErrorCount += float64(numSendErrors) + ContainerLogTelemetryMutex.Unlock() +} + // PostDataHelper sends data to the OMS endpoint func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { @@ -285,7 +496,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { if len(dataItems) > 0 { logEntry := ContainerLogBlob{ - DataType: DataType, + DataType: ContainerLogDataType, IPName: IPName, DataItems: dataItems} @@ -384,14 +595,30 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { log.Fatalln(message) } OMSEndpoint = omsadminConf["OMS_ENDPOINT"] + Log("OMSEndpoint %s", OMSEndpoint) + WorkspaceID = omsadminConf["WORKSPACE_ID"] ResourceID = os.Getenv("customResourceId") + if len(ResourceID) > 0 { + //AKS Scenario ResourceCentric = true - Log("OMS ResourceId=%s",ResourceID) + splitted := strings.Split(ResourceID, "/") + ResourceName = splitted[len(splitted)-1] + Log("ResourceCentric: True") + Log("ResourceID=%s",ResourceID) + Log("ResourceName=%s",ResourceID) + } + + if ResourceCentric == false { + //AKS-Engine/hybrid scenario + ResourceName = os.Getenv(ResourceNameEnv) + ResourceID = ResourceName + Log("ResourceCentric: False") + Log("ResourceID=%s",ResourceID) + Log("ResourceName=%s",ResourceName) } - Log("OMSEndpoint %s", OMSEndpoint) - + // Initialize image,name map refresh ticker containerInventoryRefreshInterval, err := strconv.Atoi(pluginConfig["container_inventory_refresh_interval"]) if err != nil { diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index 133e0f039..dccc6774c 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -2,11 +2,13 @@ package main import ( "github.com/fluent/fluent-bit-go/output" + "github.com/Microsoft/ApplicationInsights-Go/appinsights" ) import ( "C" "strings" "unsafe" + "os" ) //export FLBPluginRegister @@ -19,8 +21,14 @@ func FLBPluginRegister(ctx unsafe.Pointer) int { // ctx (context) pointer to fluentbit context (state/ c code) func FLBPluginInit(ctx unsafe.Pointer) int { Log("Initializing out_oms go plugin for fluentbit") - agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion") - InitializePlugin(ContainerLogPluginConfFilePath, agentVersion) + agentVersion := os.Getenv("AGENT_VERSION") + if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "replicaset") == 0 { + Log("Using %s for plugin config \n", ReplicaSetContainerLogPluginConfFilePath) + InitializePlugin(ReplicaSetContainerLogPluginConfFilePath, agentVersion) + } else { + Log("Using %s for plugin config \n", DaemonSetContainerLogPluginConfFilePath) + InitializePlugin(DaemonSetContainerLogPluginConfFilePath, agentVersion) + } enableTelemetry := output.FLBPluginConfigKey(ctx, "EnableTelemetry") if strings.Compare(strings.ToLower(enableTelemetry), "true") == 0 { telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushIntervalSeconds") @@ -51,9 +59,13 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { records = append(records, record) } - incomingTag := C.GoString(tag) - if strings.Contains(strings.ToLower(incomingTag), "oms.container.log.flbplugin") { - return PushToAppInsightsTraces(records) + incomingTag := strings.ToLower(C.GoString(tag)) + if strings.Contains(incomingTag, "oms.container.log.flbplugin") { + return PushToAppInsightsTraces(records, appinsights.Information, incomingTag) + } else if strings.Contains(incomingTag, "oms.container.perf.telegraf") { + return PostTelegrafMetricsToLA(records) + } else if strings.Contains(incomingTag, "oms.container.log.telegraf.err") { + return PushToAppInsightsTraces(records, appinsights.Error, incomingTag) } return PostDataHelper(records) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index a64ca2218..f507e4ab9 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -9,11 +9,12 @@ import ( "time" "github.com/Microsoft/ApplicationInsights-Go/appinsights" + "github.com/Microsoft/ApplicationInsights-Go/appinsights/contracts" "github.com/fluent/fluent-bit-go/output" ) var ( - // FlushedRecordsCount indicates the number of flushed records in the current period + // FlushedRecordsCount indicates the number of flushed log records in the current period FlushedRecordsCount float64 // FlushedRecordsTimeTaken indicates the cumulative time taken to flush the records for the current period FlushedRecordsTimeTaken float64 @@ -27,19 +28,23 @@ var ( TelemetryClient appinsights.TelemetryClient // ContainerLogTelemetryTicker sends telemetry periodically ContainerLogTelemetryTicker *time.Ticker + //Tracks the number of telegraf metrics sent successfully between telemetry ticker periods (uses ContainerLogTelemetryTicker) + TelegrafMetricsSentCount float64 + //Tracks the number of send errors between telemetry ticker periods (uses ContainerLogTelemetryTicker) + TelegrafMetricsSendErrorCount float64 ) const ( clusterTypeACS = "ACS" clusterTypeAKS = "AKS" - controllerTypeDaemonSet = "DaemonSet" - controllerTypeReplicaSet = "ReplicaSet" envAKSResourceID = "AKS_RESOURCE_ID" envACSResourceName = "ACS_RESOURCE_NAME" envAppInsightsAuth = "APPLICATIONINSIGHTS_AUTH" metricNameAvgFlushRate = "ContainerLogAvgRecordsFlushedPerSec" metricNameAvgLogGenerationRate = "ContainerLogsGeneratedPerSec" metricNameAgentLogProcessingMaxLatencyMs = "ContainerLogsAgentSideLatencyMs" + metricNameNumberofTelegrafMetricsSentSuccessfully = "TelegrafMetricsSentCount" + metricNameNumberofSendErrorsTelegrafMetrics = "TelegrafMetricsSendErrorCount" defaultTelemetryPushIntervalSeconds = 300 @@ -63,9 +68,14 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { for ; true; <-ContainerLogTelemetryTicker.C { SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string)) elapsed := time.Since(start) + ContainerLogTelemetryMutex.Lock() flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000 logRate := FlushedRecordsCount / float64(elapsed/time.Second) + telegrafMetricsSentCount := TelegrafMetricsSentCount + telegrafMetricsSendErrorCount := TelegrafMetricsSendErrorCount + TelegrafMetricsSentCount = 0.0 + TelegrafMetricsSendErrorCount = 0.0 FlushedRecordsCount = 0.0 FlushedRecordsTimeTaken = 0.0 logLatencyMs := AgentLogProcessingMaxLatencyMs @@ -81,6 +91,8 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { logLatencyMetric := appinsights.NewMetricTelemetry(metricNameAgentLogProcessingMaxLatencyMs, logLatencyMs) logLatencyMetric.Properties["Container"] = logLatencyMsContainer TelemetryClient.Track(logLatencyMetric) + TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameNumberofTelegrafMetricsSentSuccessfully, telegrafMetricsSentCount)) + TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameNumberofSendErrorsTelegrafMetrics, telegrafMetricsSendErrorCount)) start = time.Now() } } @@ -129,7 +141,7 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { CommonProperties = make(map[string]string) CommonProperties["Computer"] = Computer CommonProperties["WorkspaceID"] = WorkspaceID - CommonProperties["ControllerType"] = controllerTypeDaemonSet + CommonProperties["ControllerType"] = os.Getenv("CONTROLLER_TYPE") CommonProperties["AgentVersion"] = agentVersion aksResourceID := os.Getenv(envAKSResourceID) @@ -164,13 +176,15 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { } // PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance -func PushToAppInsightsTraces(records []map[interface{}]interface{}) int { +func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int { var logLines []string for _, record := range records { logLines = append(logLines, ToString(record["log"])) } traceEntry := strings.Join(logLines, "\n") - TelemetryClient.TrackTrace(traceEntry, 1) + traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel) + traceTelemetryItem.Properties["tag"] = tag + TelemetryClient.Track(traceTelemetryItem) return output.FLB_OK } From 8cdf72437b3af7b49e6931602a2f2218deea8fbe Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 17 Apr 2019 19:20:57 -0700 Subject: [PATCH 88/88] Fix telemetry error for telegraf err count metric (#215) --- installer/scripts/TelegrafTCPErrorTelemetry.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/scripts/TelegrafTCPErrorTelemetry.sh b/installer/scripts/TelegrafTCPErrorTelemetry.sh index 637af3969..2bd58b202 100644 --- a/installer/scripts/TelegrafTCPErrorTelemetry.sh +++ b/installer/scripts/TelegrafTCPErrorTelemetry.sh @@ -1,3 +1,3 @@ #!/bin/sh countErr=$(grep -iF "socket_writer" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n') -echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID} telegrafTCPWriteErrorCountTotal=${countErr}i" \ No newline at end of file +echo "telegraf,Source=telegrafErrLog telegrafTCPWriteErrorCountTotal=${countErr}i" \ No newline at end of file