From 671a73000560199a5e0124256e3340d5790c6380 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 11 Sep 2019 17:49:51 -0700 Subject: [PATCH 001/117] changes --- installer/scripts/tomlparser.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index b66e1257e..cefdff4c6 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -136,6 +136,14 @@ def populateSettingValuesFromConfigMap(parsedConfig) @configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"] puts "****************Start Config Processing********************" +teststr = "rashmi1 +rashmi2 +rashmi3 +rashmi4" + +puts "without_to_json: " + teststr +puts "with_to_json: " + teststr.to_json + if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it configMapSettings = {} From 03ca1bc3d443ade0b1979748041dc7b04189c185 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 11 Sep 2019 19:51:18 -0700 Subject: [PATCH 002/117] changes --- .../scripts/config-parse-error-logger.rb | 21 ++++++++ .../scripts/td-agent-bit-conf-customizer.rb | 3 +- .../scripts/tomlparser-prom-customconfig.rb | 13 ++--- installer/scripts/tomlparser.rb | 48 ++++++++----------- 4 files changed, 51 insertions(+), 34 deletions(-) create mode 100644 installer/scripts/config-parse-error-logger.rb diff --git a/installer/scripts/config-parse-error-logger.rb b/installer/scripts/config-parse-error-logger.rb new file mode 100644 index 000000000..5d6db8016 --- /dev/null +++ b/installer/scripts/config-parse-error-logger.rb @@ -0,0 +1,21 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +class ConfigParseErrorLogger + require "json" + + def initialize + end + + class << self + def logError(message) + begin + errorMessage = "config::error::" + message + jsonMessage = errorMessage.to_json + STDERR.puts jsonMessage + rescue => errorStr + puts "Error in ConfigParserErrorLogger::logError: #{errorStr}" + end + end + end +end diff --git a/installer/scripts/td-agent-bit-conf-customizer.rb b/installer/scripts/td-agent-bit-conf-customizer.rb index 1e62e3cc2..8fd60f968 100644 --- a/installer/scripts/td-agent-bit-conf-customizer.rb +++ b/installer/scripts/td-agent-bit-conf-customizer.rb @@ -1,4 +1,5 @@ #!/usr/local/bin/ruby +require_relative "ConfigParseErrorLogger" @td_agent_bit_conf_path = "/etc/opt/microsoft/docker-cimprov/td-agent-bit.conf" @@ -40,7 +41,7 @@ def substituteFluentBitPlaceHolders File.open(@td_agent_bit_conf_path, "w") { |file| file.puts new_contents } puts "config::Successfully substituted the placeholders in td-agent-bit.conf file" rescue => errorStr - puts "td-agent-bit-config-customizer: error while substituting values: #{errorStr}" + ConfigParseErrorLogger.logError("td-agent-bit-config-customizer: error while substituting values: #{errorStr}") end end diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb index d44bf3342..c340c20d1 100644 --- a/installer/scripts/tomlparser-prom-customconfig.rb +++ b/installer/scripts/tomlparser-prom-customconfig.rb @@ -1,6 +1,7 @@ #!/usr/local/bin/ruby require_relative "tomlrb" +require_relative "ConfigParseErrorLogger" require "fileutils" @promConfigMapMountPath = "/etc/config/settings/prometheus-data-collection-settings" @@ -40,7 +41,7 @@ def parseConfigMap return nil end rescue => errorStr - puts "config::error::Exception while parsing toml config file for prometheus config: #{errorStr}, using defaults" + ConfigParseErrorLogger.logError("Exception while parsing toml config file for prometheus config: #{errorStr}, using defaults") return nil end end @@ -66,7 +67,7 @@ def replaceDefaultMonitorPodSettings(new_contents, monitorKubernetesPods) new_contents = new_contents.gsub("$AZMON_RS_PROM_MONITOR_PODS", ("monitor_kubernetes_pods = #{monitorKubernetesPods}")) new_contents = new_contents.gsub("$AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER", "") rescue => errorStr - puts "config::error::Exception while replacing default pod monitor settings: #{errorStr}" + ConfigParseErrorLogger.logError("Exception while replacing default pod monitor settings: #{errorStr}") end return new_contents end @@ -98,7 +99,7 @@ def createPrometheusPluginsWithNamespaceSetting(monitorKubernetesPods, monitorKu new_contents = new_contents.gsub("$AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER", pluginConfigsWithNamespaces) return new_contents rescue => errorStr - puts "config::error::Exception while creating prometheus input plugins to filter namespaces: #{errorStr}, using defaults" + ConfigParseErrorLogger.logError("Exception while creating prometheus input plugins to filter namespaces: #{errorStr}, using defaults") replaceDefaultMonitorPodSettings(new_contents, monitorKubernetesPods) end end @@ -184,7 +185,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Typecheck failed for prometheus config settings for replicaset, using defaults" end # end of type check condition rescue => errorStr - puts "config::error::Exception while parsing config file for prometheus config for replicaset: #{errorStr}, using defaults" + ConfigParseErrorLogger.logError("Exception while parsing config file for prometheus config for replicaset: #{errorStr}, using defaults") setRsPromDefaults puts "****************End Prometheus Config Processing********************" end @@ -239,13 +240,13 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Typecheck failed for prometheus config settings for daemonset, using defaults" end # end of type check condition rescue => errorStr - puts "config::error::Exception while parsing config file for prometheus config for daemonset: #{errorStr}, using defaults" + ConfigParseErrorLogger.logError("Exception while parsing config file for prometheus config for daemonset: #{errorStr}, using defaults") puts "****************End Prometheus Config Processing********************" end end # end of controller type check end else - puts "config::error:: Controller undefined while processing prometheus config, using defaults" + ConfigParseErrorLogger.logError("Controller undefined while processing prometheus config, using defaults") end end diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index cefdff4c6..593cea743 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -1,7 +1,8 @@ #!/usr/local/bin/ruby require_relative "tomlrb" -require 'json' +require_relative "ConfigParseErrorLogger" +require "json" @log_settings_config_map_mount_path = "/etc/config/settings/log-data-collection-settings" @agent_settings_config_map_mount_path = "/etc/config/settings/agent-settings" @@ -33,7 +34,7 @@ def parseConfigMap(path) return nil end rescue => errorStr - puts "config::error::Exception while parsing toml config file: #{errorStr}, using defaults" + ConfigParseErrorLogger.logError("Exception while parsing toml config file: #{errorStr}, using defaults") @excludePath = "*_kube-system_*.log" return nil end @@ -70,7 +71,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end rescue => errorStr - puts "config::error::Exception while reading config settings for stdout log collection - #{errorStr}, using defaults" + ConfigParseErrorLogger.logError("Exception while reading config settings for stdout log collection - #{errorStr}, using defaults") end #Get stderr log config settings @@ -107,7 +108,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end rescue => errorStr - puts "config::error:Exception while reading config settings for stderr log collection - #{errorStr}, using defaults" + ConfigParseErrorLogger.logError("Exception while reading config settings for stderr log collection - #{errorStr}, using defaults") end #Get environment variables log config settings @@ -117,50 +118,43 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Using config map setting for cluster level environment variable collection" end rescue => errorStr - puts "config::error::Exception while reading config settings for cluster level environment variable collection - #{errorStr}, using defaults" + ConfigParseErrorLogger.logError("Exception while reading config settings for cluster level environment variable collection - #{errorStr}, using defaults") end end begin if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil? && !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil? - @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled] + @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled] else - @enable_health_model = false + @enable_health_model = false end puts "enable_health_model = #{@enable_health_model}" rescue => errorStr - puts "config::error:Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults" + ConfigParseErrorLogger.logError("Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults") @enable_health_model = false end end @configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"] puts "****************Start Config Processing********************" -teststr = "rashmi1 -rashmi2 -rashmi3 -rashmi4" - -puts "without_to_json: " + teststr -puts "with_to_json: " + teststr.to_json if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it - configMapSettings = {} + configMapSettings = {} - #iterate over every *settings file and build a hash of settings - Dir["/etc/config/settings/*settings"].each{|file| - puts "Parsing File #{file}" - settings = parseConfigMap(file) - if !settings.nil? - configMapSettings = configMapSettings.merge(settings) - end - } + #iterate over every *settings file and build a hash of settings + Dir["/etc/config/settings/*settings"].each { |file| + puts "Parsing File #{file}" + settings = parseConfigMap(file) + if !settings.nil? + configMapSettings = configMapSettings.merge(settings) + end + } if !configMapSettings.nil? populateSettingValuesFromConfigMap(configMapSettings) end else - puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults" + puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults" @excludePath = "*_kube-system_*.log" end @@ -186,13 +180,13 @@ def populateSettingValuesFromConfigMap(parsedConfig) file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n") file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n") file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n") - #health_model settings + #health_model settings file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n") # Close file after writing all environment variables file.close puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' " puts "****************End Config Processing********************" else - puts "config::error::Exception while opening file for writing config environment variables" + ConfigParseErrorLogger.logError("Exception while opening file for writing config environment variables") puts "****************End Config Processing********************" end From ff9b29986bff35608b3829544bf7aaf42d2e8e03 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 11 Sep 2019 21:28:01 -0700 Subject: [PATCH 003/117] changes --- installer/conf/td-agent-bit.conf | 2 +- installer/datafiles/base_container.data | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 4e3de6c46..bc66c9bfc 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -51,7 +51,7 @@ [FILTER] Name grep Match oms.container.log.flbplugin.* - Exclude log E! [\[]inputs.prometheus[\]] + # Exclude log E! [\[]inputs.prometheus[\]] [OUTPUT] Name oms diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index 3dc1a18cd..da611e641 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -115,6 +115,7 @@ MAINTAINER: 'Microsoft Corporation' /opt/tomlparser.rb; installer/scripts/tomlparser.rb; 755; root; root /opt/tomlparser-prom-customconfig.rb; installer/scripts/tomlparser-prom-customconfig.rb; 755; root; root /opt/td-agent-bit-conf-customizer.rb; installer/scripts/td-agent-bit-conf-customizer.rb; 755; root; root +/opt/config-parse-error-logger.rb; installer/scripts/config-parse-error-logger.rb; 755; root; root From cb7a7fe26834a32dc5a281d32df3f4731970c051 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 11 Sep 2019 21:41:54 -0700 Subject: [PATCH 004/117] changes --- installer/datafiles/base_container.data | 2 +- .../{config-parse-error-logger.rb => ConfigParseErrorLogger.rb} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename installer/scripts/{config-parse-error-logger.rb => ConfigParseErrorLogger.rb} (100%) diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data index da611e641..7accc02fb 100644 --- a/installer/datafiles/base_container.data +++ b/installer/datafiles/base_container.data @@ -115,7 +115,7 @@ MAINTAINER: 'Microsoft Corporation' /opt/tomlparser.rb; installer/scripts/tomlparser.rb; 755; root; root /opt/tomlparser-prom-customconfig.rb; installer/scripts/tomlparser-prom-customconfig.rb; 755; root; root /opt/td-agent-bit-conf-customizer.rb; installer/scripts/td-agent-bit-conf-customizer.rb; 755; root; root -/opt/config-parse-error-logger.rb; installer/scripts/config-parse-error-logger.rb; 755; root; root +/opt/ConfigParseErrorLogger.rb; installer/scripts/ConfigParseErrorLogger.rb; 755; root; root diff --git a/installer/scripts/config-parse-error-logger.rb b/installer/scripts/ConfigParseErrorLogger.rb similarity index 100% rename from installer/scripts/config-parse-error-logger.rb rename to installer/scripts/ConfigParseErrorLogger.rb From add3a2a46115e8e32e978c9cfb9e38ddf4e3e5b3 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 11 Sep 2019 21:51:41 -0700 Subject: [PATCH 005/117] changes --- installer/conf/td-agent-bit.conf | 1 - 1 file changed, 1 deletion(-) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index bc66c9bfc..90f106ebc 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -51,7 +51,6 @@ [FILTER] Name grep Match oms.container.log.flbplugin.* - # Exclude log E! [\[]inputs.prometheus[\]] [OUTPUT] Name oms From 428885a2f61d86145ca7d8547123cbd62c2b2214 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 11 Sep 2019 22:26:25 -0700 Subject: [PATCH 006/117] changes --- installer/scripts/tomlparser-prom-customconfig.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb index c340c20d1..d365c87e5 100644 --- a/installer/scripts/tomlparser-prom-customconfig.rb +++ b/installer/scripts/tomlparser-prom-customconfig.rb @@ -182,7 +182,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Successfully created telemetry file for replicaset" end else - puts "config::Typecheck failed for prometheus config settings for replicaset, using defaults" + ConfigParseErrorLogger.logError("Typecheck failed for prometheus config settings for replicaset, using defaults") end # end of type check condition rescue => errorStr ConfigParseErrorLogger.logError("Exception while parsing config file for prometheus config for replicaset: #{errorStr}, using defaults") From 5e120f3e52849232d2667e9cff26634a7233cb5b Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 10:05:18 -0700 Subject: [PATCH 007/117] env changes --- installer/scripts/tomlparser.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index 593cea743..99a5569a8 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -113,10 +113,10 @@ def populateSettingValuesFromConfigMap(parsedConfig) #Get environment variables log config settings begin - if !parsedConfig[:log_collection_settings][:env_var].nil? && !parsedConfig[:log_collection_settings][:env_var][:enabled].nil? + #if !parsedConfig[:log_collection_settings][:env_var].nil? && !parsedConfig[:log_collection_settings][:env_var][:enabled].nil? @collectClusterEnvVariables = parsedConfig[:log_collection_settings][:env_var][:enabled] puts "config::Using config map setting for cluster level environment variable collection" - end + #end rescue => errorStr ConfigParseErrorLogger.logError("Exception while reading config settings for cluster level environment variable collection - #{errorStr}, using defaults") end From eaec28bde040e2269018140dce92c55ad82a2532 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 10:18:01 -0700 Subject: [PATCH 008/117] changes --- installer/scripts/tomlparser.rb | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index 99a5569a8..a1e5bc57f 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -66,7 +66,11 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end puts "config::Using config map setting for stdout log collection to exclude namespace" + else + ConfigParseErrorLogger.logError("Config setting for stdout exclude_namespaces is not an array of strings, using defaults") end + else + ConfigParseErrorLogger.logError("Config setting for stdout exclude_namespaces is not an array, using defaults") end end end @@ -103,7 +107,11 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end puts "config::Using config map setting for stderr log collection to exclude namespace" + else + ConfigParseErrorLogger.logError("Config setting for stderr exclude_namespaces is not an array of strings, using defaults") end + else + ConfigParseErrorLogger.logError("Config setting for stderr exclude_namespaces is not an array, using defaults") end end end @@ -113,10 +121,10 @@ def populateSettingValuesFromConfigMap(parsedConfig) #Get environment variables log config settings begin - #if !parsedConfig[:log_collection_settings][:env_var].nil? && !parsedConfig[:log_collection_settings][:env_var][:enabled].nil? + if !parsedConfig[:log_collection_settings][:env_var].nil? && !parsedConfig[:log_collection_settings][:env_var][:enabled].nil? @collectClusterEnvVariables = parsedConfig[:log_collection_settings][:env_var][:enabled] puts "config::Using config map setting for cluster level environment variable collection" - #end + end rescue => errorStr ConfigParseErrorLogger.logError("Exception while reading config settings for cluster level environment variable collection - #{errorStr}, using defaults") end From 4aa1e74bb646c2d381cdab49b0316d8f35501527 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 10:45:59 -0700 Subject: [PATCH 009/117] changes --- installer/scripts/tomlparser.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index a1e5bc57f..6bf888f54 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -50,12 +50,12 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Using config map setting for stdout log collection" stdoutNamespaces = parsedConfig[:log_collection_settings][:stdout][:exclude_namespaces] - #Clearing it, so that it can be overridden with the config map settings - @stdoutExcludeNamespaces.clear if @collectStdoutLogs && !stdoutNamespaces.nil? if stdoutNamespaces.kind_of?(Array) # Checking only for the first element to be string because toml enforces the arrays to contain elements of same type if stdoutNamespaces.length > 0 && stdoutNamespaces[0].kind_of?(String) + #Clearing it, so that it can be overridden with the config map settings + @stdoutExcludeNamespaces.clear #Empty the array to use the values from configmap stdoutNamespaces.each do |namespace| if @stdoutExcludeNamespaces.empty? @@ -85,8 +85,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Using config map setting for stderr log collection" stderrNamespaces = parsedConfig[:log_collection_settings][:stderr][:exclude_namespaces] stdoutNamespaces = Array.new - #Clearing it, so that it can be overridden with the config map settings - @stderrExcludeNamespaces.clear + if @collectStderrLogs && !stderrNamespaces.nil? if stderrNamespaces.kind_of?(Array) if !@stdoutExcludeNamespaces.nil? && !@stdoutExcludeNamespaces.empty? @@ -94,6 +93,8 @@ def populateSettingValuesFromConfigMap(parsedConfig) end # Checking only for the first element to be string because toml enforces the arrays to contain elements of same type if stderrNamespaces.length > 0 && stderrNamespaces[0].kind_of?(String) + #Clearing it, so that it can be overridden with the config map settings + @stderrExcludeNamespaces.clear stderrNamespaces.each do |namespace| if @stderrExcludeNamespaces.empty? # To not append , for the first element From 186a4fbee0e127f86c981d26d8a078d74ecd6222 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 11:09:03 -0700 Subject: [PATCH 010/117] changes --- installer/scripts/tomlparser.rb | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index 6bf888f54..a1e5bc57f 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -50,12 +50,12 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Using config map setting for stdout log collection" stdoutNamespaces = parsedConfig[:log_collection_settings][:stdout][:exclude_namespaces] + #Clearing it, so that it can be overridden with the config map settings + @stdoutExcludeNamespaces.clear if @collectStdoutLogs && !stdoutNamespaces.nil? if stdoutNamespaces.kind_of?(Array) # Checking only for the first element to be string because toml enforces the arrays to contain elements of same type if stdoutNamespaces.length > 0 && stdoutNamespaces[0].kind_of?(String) - #Clearing it, so that it can be overridden with the config map settings - @stdoutExcludeNamespaces.clear #Empty the array to use the values from configmap stdoutNamespaces.each do |namespace| if @stdoutExcludeNamespaces.empty? @@ -85,7 +85,8 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Using config map setting for stderr log collection" stderrNamespaces = parsedConfig[:log_collection_settings][:stderr][:exclude_namespaces] stdoutNamespaces = Array.new - + #Clearing it, so that it can be overridden with the config map settings + @stderrExcludeNamespaces.clear if @collectStderrLogs && !stderrNamespaces.nil? if stderrNamespaces.kind_of?(Array) if !@stdoutExcludeNamespaces.nil? && !@stdoutExcludeNamespaces.empty? @@ -93,8 +94,6 @@ def populateSettingValuesFromConfigMap(parsedConfig) end # Checking only for the first element to be string because toml enforces the arrays to contain elements of same type if stderrNamespaces.length > 0 && stderrNamespaces[0].kind_of?(String) - #Clearing it, so that it can be overridden with the config map settings - @stderrExcludeNamespaces.clear stderrNamespaces.each do |namespace| if @stderrExcludeNamespaces.empty? # To not append , for the first element From b55d6777bafbb0bc255438762eeeebec06643705 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 11:20:11 -0700 Subject: [PATCH 011/117] reverting --- installer/scripts/tomlparser.rb | 8 -------- 1 file changed, 8 deletions(-) diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index a1e5bc57f..593cea743 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -66,11 +66,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end puts "config::Using config map setting for stdout log collection to exclude namespace" - else - ConfigParseErrorLogger.logError("Config setting for stdout exclude_namespaces is not an array of strings, using defaults") end - else - ConfigParseErrorLogger.logError("Config setting for stdout exclude_namespaces is not an array, using defaults") end end end @@ -107,11 +103,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end puts "config::Using config map setting for stderr log collection to exclude namespace" - else - ConfigParseErrorLogger.logError("Config setting for stderr exclude_namespaces is not an array of strings, using defaults") end - else - ConfigParseErrorLogger.logError("Config setting for stderr exclude_namespaces is not an array, using defaults") end end end From 611a7b1d01f3bd9bc39daa7e7e71c3b11e34065c Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 15:11:49 -0700 Subject: [PATCH 012/117] changes --- source/code/go/src/plugins/out_oms.go | 1 + source/code/go/src/plugins/telemetry.go | 30 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index e9e7124b7..654772f32 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -61,6 +61,7 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { incomingTag := strings.ToLower(C.GoString(tag)) if strings.Contains(incomingTag, "oms.container.log.flbplugin") { + // This will also include routing to send data to OMS workspace for config errors return PushToAppInsightsTraces(records, appinsights.Information, incomingTag) } else if strings.Contains(incomingTag, "oms.container.perf.telegraf") { return PostTelegrafMetricsToLA(records) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 4f22b8c03..78c56ad65 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -56,6 +56,11 @@ const ( eventNameDaemonSetHeartbeat = "ContainerLogDaemonSetHeartbeatEvent" ) +const ( + ConfigError = iota + ScrapingError +) + // SendContainerLogPluginMetrics is a go-routine that flushes the data periodically (every 5 mins to App Insights) func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { telemetryPushInterval, err := strconv.Atoi(telemetryPushIntervalProperty) @@ -194,11 +199,36 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { return 0, nil } +// PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA +func PostConfigErrorstoLA(record map[interface{}]interface{}, errorType const) { + configErrorHash := make(map[string]struct{}) + promScrapeErrorHash := make(map[string]struct{}) + + logRecordString = ToString(record["Log"] + + if (errorType == ConfigError) { + Log("configError\n") + Log(logRecordString) + Log("\n") + } else { + Log("scrapingError\n") + Log(after(logRecordString, "[inputs.prometheus]: ") + Log("\n") + } +} + // PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int { var logLines []string for _, record := range records { logLines = append(logLines, ToString(record["log"])) + // If record contains config error or prometheus scraping errors send it to ****** table + var logEntry = ToString(record["log"]) + if (strings.Contains(logEntry, "config::error") { + PostConfigErrorstoLA(record, ConfigError) + } else if (strings.Contains(logEntry, "E! [inputs.prometheus]")) { + PostConfigErrorstoLA(record, ScrapingError) + } } traceEntry := strings.Join(logLines, "\n") From 71c458a21b3090d8dba51ffb23b4ff6d166a5a9e Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 15:16:56 -0700 Subject: [PATCH 013/117] cahnges --- source/code/go/src/plugins/telemetry.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 78c56ad65..9b2cafaa7 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -56,8 +56,10 @@ const ( eventNameDaemonSetHeartbeat = "ContainerLogDaemonSetHeartbeatEvent" ) +type ErrorType int + const ( - ConfigError = iota + ConfigError ErrorType = iota ScrapingError ) @@ -200,13 +202,13 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { } // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA -func PostConfigErrorstoLA(record map[interface{}]interface{}, errorType const) { +func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) { configErrorHash := make(map[string]struct{}) promScrapeErrorHash := make(map[string]struct{}) logRecordString = ToString(record["Log"] - if (errorType == ConfigError) { + if (errType == ConfigError) { Log("configError\n") Log(logRecordString) Log("\n") From 2a9380d94ec6e6801310c4bb4c2516e348c1051e Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 15:19:53 -0700 Subject: [PATCH 014/117] changes --- source/code/go/src/plugins/telemetry.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 9b2cafaa7..fb60d5c07 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -206,15 +206,15 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) configErrorHash := make(map[string]struct{}) promScrapeErrorHash := make(map[string]struct{}) - logRecordString = ToString(record["Log"] + logRecordString = ToString(record["Log"]) - if (errType == ConfigError) { - Log("configError\n") - Log(logRecordString) - Log("\n") + if errType == ConfigError { + Log("configError\n") + Log(logRecordString) + Log("\n") } else { Log("scrapingError\n") - Log(after(logRecordString, "[inputs.prometheus]: ") + Log(after(logRecordString, "[inputs.prometheus]: ")) Log("\n") } } @@ -226,9 +226,9 @@ func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLeve logLines = append(logLines, ToString(record["log"])) // If record contains config error or prometheus scraping errors send it to ****** table var logEntry = ToString(record["log"]) - if (strings.Contains(logEntry, "config::error") { + if strings.Contains(logEntry, "config::error") { PostConfigErrorstoLA(record, ConfigError) - } else if (strings.Contains(logEntry, "E! [inputs.prometheus]")) { + } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { PostConfigErrorstoLA(record, ScrapingError) } } From 41623f8c05cb1915fdab37b8e836effe37010f76 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 15:31:06 -0700 Subject: [PATCH 015/117] changes --- source/code/go/src/plugins/telemetry.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index fb60d5c07..2a87edc36 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -206,7 +206,7 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) configErrorHash := make(map[string]struct{}) promScrapeErrorHash := make(map[string]struct{}) - logRecordString = ToString(record["Log"]) + var logRecordString = ToString(record["log"]) if errType == ConfigError { Log("configError\n") @@ -214,7 +214,10 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) Log("\n") } else { Log("scrapingError\n") - Log(after(logRecordString, "[inputs.prometheus]: ")) + var scrapingSplitString = string.Split(logRecordString, "[inputs.prometheus]: ") + if scrapingSplitString != nil && len(scrapingSplitString) == 2 { + Log(scrapingSplitString[1]) + } Log("\n") } } From 1ff772e34d82407d137ea3a5f7ff4c81ce9ca8d5 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 15:55:41 -0700 Subject: [PATCH 016/117] changes --- source/code/go/src/plugins/telemetry.go | 1 + 1 file changed, 1 insertion(+) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 2a87edc36..52f99444d 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -7,6 +7,7 @@ import ( "strconv" "strings" "time" + "fmt" "github.com/Microsoft/ApplicationInsights-Go/appinsights" "github.com/Microsoft/ApplicationInsights-Go/appinsights/contracts" From ac6dd3f2b9de630a52860039cfbc91b6f2199d2d Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 15:58:04 -0700 Subject: [PATCH 017/117] changes --- source/code/go/src/plugins/telemetry.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 52f99444d..e077adb38 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -7,7 +7,6 @@ import ( "strconv" "strings" "time" - "fmt" "github.com/Microsoft/ApplicationInsights-Go/appinsights" "github.com/Microsoft/ApplicationInsights-Go/appinsights/contracts" @@ -215,7 +214,7 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) Log("\n") } else { Log("scrapingError\n") - var scrapingSplitString = string.Split(logRecordString, "[inputs.prometheus]: ") + var scrapingSplitString = strings.Split(logRecordString, "[inputs.prometheus]: ") if scrapingSplitString != nil && len(scrapingSplitString) == 2 { Log(scrapingSplitString[1]) } From 460953d1d7f5f8204abffae138750cf926060eb5 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 16:08:14 -0700 Subject: [PATCH 018/117] changes --- source/code/go/src/plugins/telemetry.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index e077adb38..18c439efc 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -211,12 +211,14 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) if errType == ConfigError { Log("configError\n") Log(logRecordString) + configErrorHash[logRecordString] = struct{}{} Log("\n") } else { Log("scrapingError\n") var scrapingSplitString = strings.Split(logRecordString, "[inputs.prometheus]: ") if scrapingSplitString != nil && len(scrapingSplitString) == 2 { - Log(scrapingSplitString[1]) + var splitString = Log(scrapingSplitString[1]) + promScrapeErrorHash[splitString] = struct{}{} } Log("\n") } From 4e69aa28db3ac49098acd0d8ca00bed8c745c2c6 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 16:10:22 -0700 Subject: [PATCH 019/117] changes --- source/code/go/src/plugins/telemetry.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 18c439efc..4f159d59e 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -217,7 +217,7 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) Log("scrapingError\n") var scrapingSplitString = strings.Split(logRecordString, "[inputs.prometheus]: ") if scrapingSplitString != nil && len(scrapingSplitString) == 2 { - var splitString = Log(scrapingSplitString[1]) + var splitString = scrapingSplitString[1] promScrapeErrorHash[splitString] = struct{}{} } Log("\n") From 4656efe6d40714a46d777b0832057c6152e3a4be Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 16:22:30 -0700 Subject: [PATCH 020/117] changes --- source/code/go/src/plugins/telemetry.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 4f159d59e..a30f981de 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -209,9 +209,9 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) var logRecordString = ToString(record["log"]) if errType == ConfigError { - Log("configError\n") - Log(logRecordString) + Log("configErrorHash\n") configErrorHash[logRecordString] = struct{}{} + Log(configErrorHash) Log("\n") } else { Log("scrapingError\n") @@ -219,6 +219,7 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) if scrapingSplitString != nil && len(scrapingSplitString) == 2 { var splitString = scrapingSplitString[1] promScrapeErrorHash[splitString] = struct{}{} + Log(promScrapeErrorHash) } Log("\n") } From a0b4256c2b232f840da83f89a30df8f7ba443510 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 16:27:09 -0700 Subject: [PATCH 021/117] changes --- source/code/go/src/plugins/telemetry.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index a30f981de..117a9d958 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -211,7 +211,7 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) if errType == ConfigError { Log("configErrorHash\n") configErrorHash[logRecordString] = struct{}{} - Log(configErrorHash) + Log(logRecordString) Log("\n") } else { Log("scrapingError\n") @@ -219,7 +219,7 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) if scrapingSplitString != nil && len(scrapingSplitString) == 2 { var splitString = scrapingSplitString[1] promScrapeErrorHash[splitString] = struct{}{} - Log(promScrapeErrorHash) + Log(splitString) } Log("\n") } From 7ff3ced1a887ce9b61519cbd763aa49af58fbcc0 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 16:46:22 -0700 Subject: [PATCH 022/117] changes --- source/code/go/src/plugins/telemetry.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 117a9d958..30957b796 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -56,9 +56,11 @@ const ( eventNameDaemonSetHeartbeat = "ContainerLogDaemonSetHeartbeatEvent" ) +// ErrorType to be used as enum type ErrorType int const ( + // ErrorType to be used as enum for ConfigError and ScrapingError ConfigError ErrorType = iota ScrapingError ) @@ -208,20 +210,26 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) var logRecordString = ToString(record["log"]) - if errType == ConfigError { + switch errType { + case ConfigError: Log("configErrorHash\n") configErrorHash[logRecordString] = struct{}{} Log(logRecordString) Log("\n") - } else { - Log("scrapingError\n") + + case ScrapingError: var scrapingSplitString = strings.Split(logRecordString, "[inputs.prometheus]: ") if scrapingSplitString != nil && len(scrapingSplitString) == 2 { - var splitString = scrapingSplitString[1] + var splitString0 = scrapingSplitString[0] + var splitString1 = scrapingSplitString[1] promScrapeErrorHash[splitString] = struct{}{} - Log(splitString) + Log("scrapingError-0\n") + Log(splitString0) + Log("\n") + Log("scrapingError-1\n") + Log(splitString1) + Log("\n") } - Log("\n") } } From 32549c28e93ce739ad1e967d4df2b5a2ef36147c Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 12 Sep 2019 16:48:06 -0700 Subject: [PATCH 023/117] changes --- source/code/go/src/plugins/telemetry.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 30957b796..03eb7da1c 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -206,7 +206,7 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) { configErrorHash := make(map[string]struct{}) - promScrapeErrorHash := make(map[string]struct{}) + // promScrapeErrorHash := make(map[string]struct{}) var logRecordString = ToString(record["log"]) @@ -222,7 +222,7 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) if scrapingSplitString != nil && len(scrapingSplitString) == 2 { var splitString0 = scrapingSplitString[0] var splitString1 = scrapingSplitString[1] - promScrapeErrorHash[splitString] = struct{}{} + // promScrapeErrorHash[splitString] = struct{}{} Log("scrapingError-0\n") Log(splitString0) Log("\n") From 11997cf94b7552b5e920d2df69f969ac650cb7b1 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 13 Sep 2019 12:18:07 -0700 Subject: [PATCH 024/117] changes --- source/code/go/src/plugins/telemetry.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 03eb7da1c..d13e09bb8 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -208,6 +208,9 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) configErrorHash := make(map[string]struct{}) // promScrapeErrorHash := make(map[string]struct{}) + var logRecord = ToString(record) + Log("LogRecord\n") + Log(logRecord) var logRecordString = ToString(record["log"]) switch errType { From 00737d7e25be0aeadf6f0428587de7aea7e88bd8 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 13 Sep 2019 13:30:59 -0700 Subject: [PATCH 025/117] changes --- source/code/go/src/plugins/telemetry.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index d13e09bb8..dcb42d591 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -208,9 +208,9 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) configErrorHash := make(map[string]struct{}) // promScrapeErrorHash := make(map[string]struct{}) - var logRecord = ToString(record) + //var logRecord = ToString(record) Log("LogRecord\n") - Log(logRecord) + Log(record) var logRecordString = ToString(record["log"]) switch errType { From f0c40e230bca52151615a1b1f90576748a99a761 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 13 Sep 2019 13:49:18 -0700 Subject: [PATCH 026/117] changes --- source/code/go/src/plugins/telemetry.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index dcb42d591..d7372ae4e 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -208,9 +208,9 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) configErrorHash := make(map[string]struct{}) // promScrapeErrorHash := make(map[string]struct{}) - //var logRecord = ToString(record) - Log("LogRecord\n") - Log(record) + var logRecord = ToString(record["log"]["log"]) + Log("LogRecordlog\n") + Log(logRecord) var logRecordString = ToString(record["log"]) switch errType { From d6292c3733bc0b38dd50580387399172e2a58788 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 13 Sep 2019 14:00:43 -0700 Subject: [PATCH 027/117] changes --- source/code/go/src/plugins/telemetry.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index d7372ae4e..5f53f2e7a 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -208,7 +208,8 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) configErrorHash := make(map[string]struct{}) // promScrapeErrorHash := make(map[string]struct{}) - var logRecord = ToString(record["log"]["log"]) + var test = record["log"] + var logRecord = ToString(test["log"]) Log("LogRecordlog\n") Log(logRecord) var logRecordString = ToString(record["log"]) From 67636c3ed75a8ee78bb8d0534564aa46cebf7c10 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 13 Sep 2019 14:07:42 -0700 Subject: [PATCH 028/117] changes --- source/code/go/src/plugins/telemetry.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 5f53f2e7a..240d8673e 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -208,10 +208,15 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) configErrorHash := make(map[string]struct{}) // promScrapeErrorHash := make(map[string]struct{}) - var test = record["log"] - var logRecord = ToString(test["log"]) - Log("LogRecordlog\n") - Log(logRecord) + Log("Iterating\n") + for k, v := range record { + Log("key[%s] value[%s]\n", k, v) + } + Log("Done Iterating\n") + // var test = record["log"] + // var logRecord = ToString(test["log"]) + // Log("LogRecordlog\n") + // Log(logRecord) var logRecordString = ToString(record["log"]) switch errType { From 938ffc80fb0b701f3031dc45ff87068a61c3fd81 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 13 Sep 2019 15:03:51 -0700 Subject: [PATCH 029/117] changes --- installer/conf/td-agent-bit.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf index 90f106ebc..6a1bf3e3e 100644 --- a/installer/conf/td-agent-bit.conf +++ b/installer/conf/td-agent-bit.conf @@ -28,6 +28,7 @@ Path /var/log/containers/omsagent*.log DB /var/opt/microsoft/docker-cimprov/state/omsagent-ai.db DB.Sync Off + Parser docker Mem_Buf_Limit 1m Path_Key filepath Skip_Long_Lines On From 366b1e2dda45a5b28ca6e1bd64cd290cb4ba7a17 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 13 Sep 2019 15:39:18 -0700 Subject: [PATCH 030/117] changes --- source/code/go/src/plugins/telemetry.go | 43 +++++++++++++------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 240d8673e..1381edaac 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -206,38 +206,39 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) { configErrorHash := make(map[string]struct{}) - // promScrapeErrorHash := make(map[string]struct{}) + promScrapeErrorHash := make(map[string]struct{}) - Log("Iterating\n") - for k, v := range record { - Log("key[%s] value[%s]\n", k, v) - } - Log("Done Iterating\n") - // var test = record["log"] - // var logRecord = ToString(test["log"]) - // Log("LogRecordlog\n") - // Log(logRecord) + // Log("Iterating\n") + // for k, v := range record { + // Log("key[%s] value[%s]\n", k, v) + // } + // Log("Done Iterating\n") var logRecordString = ToString(record["log"]) switch errType { case ConfigError: Log("configErrorHash\n") configErrorHash[logRecordString] = struct{}{} - Log(logRecordString) + for k, v := range configErrorHash { + Log("key[%s] value[%s]\n", k, v) + } + // Log(logRecordString) Log("\n") case ScrapingError: - var scrapingSplitString = strings.Split(logRecordString, "[inputs.prometheus]: ") + // Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash + var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") if scrapingSplitString != nil && len(scrapingSplitString) == 2 { - var splitString0 = scrapingSplitString[0] - var splitString1 = scrapingSplitString[1] - // promScrapeErrorHash[splitString] = struct{}{} - Log("scrapingError-0\n") - Log(splitString0) - Log("\n") - Log("scrapingError-1\n") - Log(splitString1) - Log("\n") + var splitString = scrapingSplitString[1] + if splitString != nil { + promScrapeErrorHash[splitString] = struct{}{} + Log("promScrapeErrorHash\n") + for k, v := range promScrapeErrorHash { + Log("key[%s] value[%s]\n", k, v) + } + // Log(splitString1) + Log("\n") + } } } } From 760c8770d14a82d4eed349ce8e698d521a5468ac Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 13 Sep 2019 15:42:05 -0700 Subject: [PATCH 031/117] changes --- source/code/go/src/plugins/telemetry.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 1381edaac..5e7736cfa 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -230,7 +230,7 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") if scrapingSplitString != nil && len(scrapingSplitString) == 2 { var splitString = scrapingSplitString[1] - if splitString != nil { + if splitString != "" { promScrapeErrorHash[splitString] = struct{}{} Log("promScrapeErrorHash\n") for k, v := range promScrapeErrorHash { From f8abfae9239c6217e98310fc3f18fa7805efda7a Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 15:54:51 -0700 Subject: [PATCH 032/117] changes --- source/code/go/src/plugins/telemetry.go | 50 +++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 5e7736cfa..acea53438 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -2,7 +2,9 @@ package main import ( "encoding/base64" + "encoding/json" "errors" + "net/http" "os" "strconv" "strings" @@ -203,6 +205,19 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { return 0, nil } +// telegraf metric DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin +type laConfigError struct { + // 'golden' fields + Origin string `json:"Origin"` + Namespace string `json:"Namespace"` + Name string `json:"Name"` + Value float64 `json:"Value"` + Tags string `json:"Tags"` + // specific required fields for LA + CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated + Computer string `json:"Computer"` +} + // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) { configErrorHash := make(map[string]struct{}) @@ -240,6 +255,41 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) Log("\n") } } + + Log("Posting custom log type to LA\n") + var laConfigErrorDataItems []*laConfigError + configError := laConfigError{ + Origin: "myOrigin", + Namespace: "myNamespace", + Name: "myName", + Value: 3.14, + Tags: "myTags", + CollectionTime: "2019-09-16T10:00:00.625Z", + Computer: "myComputer", + } + + //Log ("la metric:%v", laMetric) + laConfigErrorDataItems = append(laConfigErrorDataItems, &configError) + jsonBytes, err := json.Marshal(laConfigErrorDataItems) + + var uri = "https://17052a42-0cf3-4954-bbf1-30ef85e918a2.ods.opinsights.azure.com/api/logs?api-version=2016-04-01" + req, _ := http.NewRequest("POST", uri, jsonBytes) + req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339)) + req.Header.Set("Authorization", "SharedKey 17052a42-0cf3-4954-bbf1-30ef85e918a2:s3mrYKEufENFit8ANb7BitrDbZ9Y26xhxHwa877q9co=") + req.Header.Set("Log-Type", "MyRecordType") + req.Header.Set("time-generated-field", "2019-09-16T14:00:00.625Z") + req.Header.Set("Accept", "application/json") + + resp, err := HTTPClient.Do(req) + if err != nil { + Log("Error:") + Log(err) + Log("\n") + } else { + Log("response:") + Log(resp) + Log("\n") + } } } From 0100c05f2b37294c40c75d5c1aa415b61adb2514 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 15:59:47 -0700 Subject: [PATCH 033/117] changes --- source/code/go/src/plugins/telemetry.go | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index acea53438..e677303af 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -1,9 +1,11 @@ package main import ( + "bytes" "encoding/base64" "encoding/json" "errors" + "fmt" "net/http" "os" "strconv" @@ -273,7 +275,7 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) jsonBytes, err := json.Marshal(laConfigErrorDataItems) var uri = "https://17052a42-0cf3-4954-bbf1-30ef85e918a2.ods.opinsights.azure.com/api/logs?api-version=2016-04-01" - req, _ := http.NewRequest("POST", uri, jsonBytes) + req, _ := http.NewRequest("POST", uri, bytes.NewBuffer(jsonBytes)) req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339)) req.Header.Set("Authorization", "SharedKey 17052a42-0cf3-4954-bbf1-30ef85e918a2:s3mrYKEufENFit8ANb7BitrDbZ9Y26xhxHwa877q9co=") req.Header.Set("Log-Type", "MyRecordType") @@ -282,14 +284,19 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) resp, err := HTTPClient.Do(req) if err != nil { - Log("Error:") - Log(err) - Log("\n") - } else { - Log("response:") - Log(resp) - Log("\n") + message := fmt.Sprintf("PostConfigErrorsToLA::Error:when sending data \n") + Log(message) } + + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("PostConfigErrorsToLA::Error:Response Status %v Status Code %v", resp.Status, resp.StatusCode) + } + } else if resp.StatusCode == 200 { + Log("Success") + } + + defer resp.Body.Close() } } From 1fc53ea1c093fd4d5bfbc5603065a4d8d375eac7 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 19:38:18 -0700 Subject: [PATCH 034/117] changes --- source/code/go/src/plugins/oms.go | 18 +++- source/code/go/src/plugins/telemetry.go | 125 +++++++++++++----------- 2 files changed, 84 insertions(+), 59 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index c5ad307d8..9ef56ff69 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -431,7 +431,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { DataUpdateMutex.Unlock() for _, record := range tailPluginRecords { - containerID, k8sNamespace := GetContainerIDK8sNamespaceFromFileName(ToString(record["filepath"])) + containerID, k8sNamespace, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) logEntrySource := ToString(record["stream"]) if strings.EqualFold(logEntrySource, "stdout") { @@ -552,11 +552,12 @@ func containsKey(currentMap map[string]bool, key string) bool { return c } -// GetContainerIDK8sNamespaceFromFileName Gets the container ID From the file Name +// GetContainerIDK8sNamespacePodNameFromFileName Gets the container ID, k8s namespace and pod name From the file Name // sample filename kube-proxy-dgcx7_kube-system_kube-proxy-8df7e49e9028b60b5b0d0547f409c455a9567946cf763267b7e6fa053ab8c182.log -func GetContainerIDK8sNamespaceFromFileName(filename string) (string, string) { +func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, string) { id := "" ns := "" + podName := "" start := strings.LastIndex(filename, "-") end := strings.LastIndex(filename, ".") @@ -576,7 +577,16 @@ func GetContainerIDK8sNamespaceFromFileName(filename string) (string, string) { ns = filename[start+1 : end] } - return id, ns + //start = strings.Index(filename, "_") + end = strings.Index(filename, "_") + + if start >= end || start == -1 || end == -1 { + podName = "" + } else { + podName = filename[0:end] + } + + return id, ns, podName } // InitializePlugin reads and populates plugin configuration diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index e677303af..d3414cc86 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -1,12 +1,9 @@ package main import ( - "bytes" "encoding/base64" - "encoding/json" "errors" "fmt" - "net/http" "os" "strconv" "strings" @@ -207,17 +204,62 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { return 0, nil } -// telegraf metric DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin +// Config Error message to be sent to Log Analytics type laConfigError struct { - // 'golden' fields - Origin string `json:"Origin"` - Namespace string `json:"Namespace"` - Name string `json:"Name"` - Value float64 `json:"Value"` - Tags string `json:"Tags"` - // specific required fields for LA - CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated - Computer string `json:"Computer"` + ConfigErrorMessage string `json:"ConfigErrorMessage"` + ContainerId string `json:"ContainerId"` + PodName string `json:"PodName"` + CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated + Computer string `json:"Computer"` + ErrorTime string `json:"ErrorTime"` +} + +type configErrorDetails struct { + ContainerId string + PodName string + Computer string + ErrorTime string +} + +// Function to get config error log records after iterating through the two hashes +func getConfigErrorLogs(configErrorHash, promScrapeErrorHash) { + var laConfigErrorRecords []*laConfigError + + for k, v := range configErrorHash { + + laConfigErrorRecord := laConfigError{ + ConfigErrorMessage: k, + ContainerId: v.ContainerId, + Computer: "Computer", + PodName: v.PodName, + CollectionTime: + } + + Log("key[%s] value[%s]\n", k, v) + } + + for k, v := range fieldMap { + fv, ok := convert(v) + if !ok { + continue + } + i := m["timestamp"].(uint64) + laMetric := laTelegrafMetric{ + Origin: fmt.Sprintf("%s/%s", TelegrafMetricOriginPrefix, TelegrafMetricOriginSuffix), + //Namespace: fmt.Sprintf("%s/%s", TelegrafMetricNamespacePrefix, m["name"]), + Namespace: fmt.Sprintf("%s", m["name"]), + Name: fmt.Sprintf("%s", k), + Value: fv, + Tags: fmt.Sprintf("%s", tagJson), + CollectionTime: time.Unix(int64(i), 0).Format(time.RFC3339), + Computer: Computer, //this is the collection agent's computer name, not necessarily to which computer the metric applies to + } + + //Log ("la metric:%v", laMetric) + laMetrics = append(laMetrics, &laMetric) + } + return laMetrics, nil + } // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA @@ -231,11 +273,19 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) // } // Log("Done Iterating\n") var logRecordString = ToString(record["log"]) + var fileName = ToString(record["filepath"]) + var errorTimeStamp = ToString(record["time"]) + containerID, k8sNamespace, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) switch errType { case ConfigError: Log("configErrorHash\n") - configErrorHash[logRecordString] = struct{}{} + configErrorHash[logRecordString] = configErrorDetails{ + ContainerId: containerID, + PodName: podName, + Computer: "", + ErrorTimeStamp: errorTimeStamp, + } for k, v := range configErrorHash { Log("key[%s] value[%s]\n", k, v) } @@ -248,7 +298,12 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) if scrapingSplitString != nil && len(scrapingSplitString) == 2 { var splitString = scrapingSplitString[1] if splitString != "" { - promScrapeErrorHash[splitString] = struct{}{} + promScrapeErrorHash[splitString] = configErrorDetails{ + ContainerId: containerID, + PodName: podName, + Computer: "", + ErrorTimeStamp: errorTimeStamp, + } Log("promScrapeErrorHash\n") for k, v := range promScrapeErrorHash { Log("key[%s] value[%s]\n", k, v) @@ -257,46 +312,6 @@ func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) Log("\n") } } - - Log("Posting custom log type to LA\n") - var laConfigErrorDataItems []*laConfigError - configError := laConfigError{ - Origin: "myOrigin", - Namespace: "myNamespace", - Name: "myName", - Value: 3.14, - Tags: "myTags", - CollectionTime: "2019-09-16T10:00:00.625Z", - Computer: "myComputer", - } - - //Log ("la metric:%v", laMetric) - laConfigErrorDataItems = append(laConfigErrorDataItems, &configError) - jsonBytes, err := json.Marshal(laConfigErrorDataItems) - - var uri = "https://17052a42-0cf3-4954-bbf1-30ef85e918a2.ods.opinsights.azure.com/api/logs?api-version=2016-04-01" - req, _ := http.NewRequest("POST", uri, bytes.NewBuffer(jsonBytes)) - req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339)) - req.Header.Set("Authorization", "SharedKey 17052a42-0cf3-4954-bbf1-30ef85e918a2:s3mrYKEufENFit8ANb7BitrDbZ9Y26xhxHwa877q9co=") - req.Header.Set("Log-Type", "MyRecordType") - req.Header.Set("time-generated-field", "2019-09-16T14:00:00.625Z") - req.Header.Set("Accept", "application/json") - - resp, err := HTTPClient.Do(req) - if err != nil { - message := fmt.Sprintf("PostConfigErrorsToLA::Error:when sending data \n") - Log(message) - } - - if resp == nil || resp.StatusCode != 200 { - if resp != nil { - Log("PostConfigErrorsToLA::Error:Response Status %v Status Code %v", resp.Status, resp.StatusCode) - } - } else if resp.StatusCode == 200 { - Log("Success") - } - - defer resp.Body.Close() } } From be03e65bdc1cdac63050d4c0b150b50b02972f7c Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 20:17:12 -0700 Subject: [PATCH 035/117] changes --- source/code/go/src/plugins/oms.go | 157 ++++++++++++++++++++++++ source/code/go/src/plugins/telemetry.go | 116 +---------------- 2 files changed, 159 insertions(+), 114 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 9ef56ff69..98af4c417 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -142,6 +142,42 @@ type ContainerLogBlob struct { DataItems []DataItem `json:"DataItems"` } +// Config Error message to be sent to Log Analytics +type laConfigError struct { + ConfigErrorMessage string `json:"ConfigErrorMessage"` + ContainerId string `json:"ContainerId"` + PodName string `json:"PodName"` + CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated + Computer string `json:"Computer"` + ConfigErrorTime string `json:"ConfigErrorTime"` + ConfigErrorLevel string `json:"ConfigErrorLevel"` +} + +type configErrorDetails struct { + ContainerId string + PodName string + Computer string + ErrorTime string +} + +type ConfigErrorBlob struct { + DataType string `json:"DataType"` + IPName string `json:"IPName"` + DataItems []DataItem `json:"DataItems"` +} + +// ErrorType to be used as enum +type ErrorType int + +const ( + // ErrorType to be used as enum for ConfigError and ScrapingError + ConfigError ErrorType = iota + ScrapingError +) + +// DataType for Config error +const ConfigErrorDataType = "CONFIG_ERROR_BLOB" + func createLogger() *log.Logger { var logfile *os.File path := "/var/opt/microsoft/docker-cimprov/log/fluent-bit-out-oms-runtime.log" @@ -262,6 +298,126 @@ func convert(in interface{}) (float64, bool) { } } +// PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA +func getErrorHash(record map[interface{}]interface{}, errType ErrorType) { + // errorHash := make(map[string]configErrorDetails{}) + // promScrapeErrorHash := make(map[string]struct{}) + + // Log("Iterating\n") + // for k, v := range record + // Log("key[%s] value[%s]\n", k, v) + // } + // Log("Done Iterating\n") + var logRecordString = ToString(record["log"]) + var fileName = ToString(record["filepath"]) + var errorTimeStamp = ToString(record["time"]) + containerID, k8sNamespace, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) + + switch errType { + case ConfigError: + // Log("configErrorHash\n") + ErrorHash[logRecordString] = configErrorDetails{ + ContainerId: containerID, + PodName: podName, + Computer: "", + ErrorTimeStamp: errorTimeStamp, + } + + case ScrapingError: + // Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash + var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") + if scrapingSplitString != nil && len(scrapingSplitString) == 2 { + var splitString = scrapingSplitString[1] + if splitString != "" { + ErrorHash[splitString] = configErrorDetails{ + ContainerId: containerID, + PodName: podName, + Computer: "", + ErrorTimeStamp: errorTimeStamp, + } + } + } + } + return errorHash +} + +// Function to get config error log records after iterating through the two hashes +func getConfigErrorLogs(configErrorHash, promScrapeErrorHash) { + var laConfigErrorRecords []laConfigError + start := time.Now() + + for k, v := range configErrorHash { + laConfigErrorRecord := laConfigError{ + ConfigErrorMessage: k, + ContainerId: v.ContainerId, + Computer: "Computer", + PodName: v.PodName, + CollectionTime: start.Format(time.RFC3339), + ConfigErrorTime: v.ErrorTimeStamp, + ConfigErrorLevel: "Error", + } + laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) + // Log("key[%s] value[%s]\n", k, v) + } + + for k, v := range promScrapeErrorHash { + laConfigErrorRecord := laConfigError{ + ConfigErrorMessage: k, + ContainerId: v.ContainerId, + Computer: "Computer", + PodName: v.PodName, + CollectionTime: start.Format(time.RFC3339), + ConfigErrorTime: v.ErrorTimeStamp, + ConfigErrorLevel: "Error", + } + laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) + // Log("key[%s] value[%s]\n", k, v) + } + + if len(laConfigErrorRecords) > 0 { + configErrorEntry := ConfigErrorBlob{ + DataType: ConfigErrorDataType, + IPName: IPName, + DataItems: laConfigErrorRecords} + + marshalled, err := json.Marshal(configErrorEntry) + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + Log(message) + SendException(message) + return output.FLB_OK + } + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) + req.Header.Set("Content-Type", "application/json") + //expensive to do string len for every request, so use a flag + // if ResourceCentric == true { + // req.Header.Set("x-ms-AzureResourceId", ResourceID) + // } + + resp, err := HTTPClient.Do(req) + elapsed := time.Since(start) + + if err != nil { + message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) + Log(message) + Log("Failed to flush %d records after %s", len(laConfigErrorRecords), elapsed) + + return output.FLB_RETRY + } + + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + } + return output.FLB_RETRY + } + + defer resp.Body.Close() + numRecords := len(laConfigErrorRecords) + Log("Successfully flushed %d records in %s", numRecords, elapsed) + } +} + //Translates telegraf time series to one or more Azure loganalytics metric(s) func translateTelegrafMetrics(m map[interface{}]interface{}) ([]*laTelegrafMetric, error) { @@ -596,6 +752,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { StderrIgnoreNsSet = make(map[string]bool) ImageIDMap = make(map[string]string) NameIDMap = make(map[string]string) + ErrorHash := make(map[string]configErrorDetails{}) pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index d3414cc86..6b5fd3826 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -3,7 +3,6 @@ package main import ( "encoding/base64" "errors" - "fmt" "os" "strconv" "strings" @@ -204,117 +203,6 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { return 0, nil } -// Config Error message to be sent to Log Analytics -type laConfigError struct { - ConfigErrorMessage string `json:"ConfigErrorMessage"` - ContainerId string `json:"ContainerId"` - PodName string `json:"PodName"` - CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated - Computer string `json:"Computer"` - ErrorTime string `json:"ErrorTime"` -} - -type configErrorDetails struct { - ContainerId string - PodName string - Computer string - ErrorTime string -} - -// Function to get config error log records after iterating through the two hashes -func getConfigErrorLogs(configErrorHash, promScrapeErrorHash) { - var laConfigErrorRecords []*laConfigError - - for k, v := range configErrorHash { - - laConfigErrorRecord := laConfigError{ - ConfigErrorMessage: k, - ContainerId: v.ContainerId, - Computer: "Computer", - PodName: v.PodName, - CollectionTime: - } - - Log("key[%s] value[%s]\n", k, v) - } - - for k, v := range fieldMap { - fv, ok := convert(v) - if !ok { - continue - } - i := m["timestamp"].(uint64) - laMetric := laTelegrafMetric{ - Origin: fmt.Sprintf("%s/%s", TelegrafMetricOriginPrefix, TelegrafMetricOriginSuffix), - //Namespace: fmt.Sprintf("%s/%s", TelegrafMetricNamespacePrefix, m["name"]), - Namespace: fmt.Sprintf("%s", m["name"]), - Name: fmt.Sprintf("%s", k), - Value: fv, - Tags: fmt.Sprintf("%s", tagJson), - CollectionTime: time.Unix(int64(i), 0).Format(time.RFC3339), - Computer: Computer, //this is the collection agent's computer name, not necessarily to which computer the metric applies to - } - - //Log ("la metric:%v", laMetric) - laMetrics = append(laMetrics, &laMetric) - } - return laMetrics, nil - -} - -// PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA -func PostConfigErrorstoLA(record map[interface{}]interface{}, errType ErrorType) { - configErrorHash := make(map[string]struct{}) - promScrapeErrorHash := make(map[string]struct{}) - - // Log("Iterating\n") - // for k, v := range record { - // Log("key[%s] value[%s]\n", k, v) - // } - // Log("Done Iterating\n") - var logRecordString = ToString(record["log"]) - var fileName = ToString(record["filepath"]) - var errorTimeStamp = ToString(record["time"]) - containerID, k8sNamespace, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) - - switch errType { - case ConfigError: - Log("configErrorHash\n") - configErrorHash[logRecordString] = configErrorDetails{ - ContainerId: containerID, - PodName: podName, - Computer: "", - ErrorTimeStamp: errorTimeStamp, - } - for k, v := range configErrorHash { - Log("key[%s] value[%s]\n", k, v) - } - // Log(logRecordString) - Log("\n") - - case ScrapingError: - // Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash - var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") - if scrapingSplitString != nil && len(scrapingSplitString) == 2 { - var splitString = scrapingSplitString[1] - if splitString != "" { - promScrapeErrorHash[splitString] = configErrorDetails{ - ContainerId: containerID, - PodName: podName, - Computer: "", - ErrorTimeStamp: errorTimeStamp, - } - Log("promScrapeErrorHash\n") - for k, v := range promScrapeErrorHash { - Log("key[%s] value[%s]\n", k, v) - } - // Log(splitString1) - Log("\n") - } - } - } -} - // PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int { var logLines []string @@ -323,9 +211,9 @@ func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLeve // If record contains config error or prometheus scraping errors send it to ****** table var logEntry = ToString(record["log"]) if strings.Contains(logEntry, "config::error") { - PostConfigErrorstoLA(record, ConfigError) + populateErrorHash(record, ConfigError) } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { - PostConfigErrorstoLA(record, ScrapingError) + populateErrorHash(record, ScrapingError) } } From edb7957fbd5efa1d5d2d328f398e3e1f23891364 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 20:21:57 -0700 Subject: [PATCH 036/117] changes --- source/code/go/src/plugins/oms.go | 24 +++++++++--------------- source/code/go/src/plugins/telemetry.go | 1 + 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 98af4c417..9f7bccd6c 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -299,15 +299,7 @@ func convert(in interface{}) (float64, bool) { } // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA -func getErrorHash(record map[interface{}]interface{}, errType ErrorType) { - // errorHash := make(map[string]configErrorDetails{}) - // promScrapeErrorHash := make(map[string]struct{}) - - // Log("Iterating\n") - // for k, v := range record - // Log("key[%s] value[%s]\n", k, v) - // } - // Log("Done Iterating\n") +func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { var logRecordString = ToString(record["log"]) var fileName = ToString(record["filepath"]) var errorTimeStamp = ToString(record["time"]) @@ -316,7 +308,7 @@ func getErrorHash(record map[interface{}]interface{}, errType ErrorType) { switch errType { case ConfigError: // Log("configErrorHash\n") - ErrorHash[logRecordString] = configErrorDetails{ + ConfigErrorHash[logRecordString] = configErrorDetails{ ContainerId: containerID, PodName: podName, Computer: "", @@ -329,7 +321,7 @@ func getErrorHash(record map[interface{}]interface{}, errType ErrorType) { if scrapingSplitString != nil && len(scrapingSplitString) == 2 { var splitString = scrapingSplitString[1] if splitString != "" { - ErrorHash[splitString] = configErrorDetails{ + PromScrapeErrorHash[splitString] = configErrorDetails{ ContainerId: containerID, PodName: podName, Computer: "", @@ -338,11 +330,10 @@ func getErrorHash(record map[interface{}]interface{}, errType ErrorType) { } } } - return errorHash } // Function to get config error log records after iterating through the two hashes -func getConfigErrorLogs(configErrorHash, promScrapeErrorHash) { +func flushConfigErrorRecords() { var laConfigErrorRecords []laConfigError start := time.Now() @@ -368,7 +359,7 @@ func getConfigErrorLogs(configErrorHash, promScrapeErrorHash) { PodName: v.PodName, CollectionTime: start.Format(time.RFC3339), ConfigErrorTime: v.ErrorTimeStamp, - ConfigErrorLevel: "Error", + ConfigErrorLevel: "Warning", } laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) // Log("key[%s] value[%s]\n", k, v) @@ -752,7 +743,10 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { StderrIgnoreNsSet = make(map[string]bool) ImageIDMap = make(map[string]string) NameIDMap = make(map[string]string) - ErrorHash := make(map[string]configErrorDetails{}) + // Keeping the two error hashes separate since we need to keep the config error hash for the lifetime of the container + // whereas the prometheus scrape error hash needs to be refreshed every hour + ConfigErrorHash := make(map[string]configErrorDetails{}) + PromScrapeErrorHash := make(map[string]configErrorDetails{}) pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 6b5fd3826..6a71fd720 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -217,6 +217,7 @@ func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLeve } } + flushConfigErrorRecords() traceEntry := strings.Join(logLines, "\n") traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel) traceTelemetryItem.Properties["tag"] = tag From ab7dc496bb82deba80bc17a2213d535e4e46fffb Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 20:23:40 -0700 Subject: [PATCH 037/117] changes --- source/code/go/src/plugins/oms.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 9f7bccd6c..4ea9a8644 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -311,7 +311,7 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { ConfigErrorHash[logRecordString] = configErrorDetails{ ContainerId: containerID, PodName: podName, - Computer: "", + Computer: Computer, ErrorTimeStamp: errorTimeStamp, } @@ -324,7 +324,7 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { PromScrapeErrorHash[splitString] = configErrorDetails{ ContainerId: containerID, PodName: podName, - Computer: "", + Computer: Computer, ErrorTimeStamp: errorTimeStamp, } } @@ -341,7 +341,7 @@ func flushConfigErrorRecords() { laConfigErrorRecord := laConfigError{ ConfigErrorMessage: k, ContainerId: v.ContainerId, - Computer: "Computer", + Computer: v.Computer, PodName: v.PodName, CollectionTime: start.Format(time.RFC3339), ConfigErrorTime: v.ErrorTimeStamp, @@ -355,7 +355,7 @@ func flushConfigErrorRecords() { laConfigErrorRecord := laConfigError{ ConfigErrorMessage: k, ContainerId: v.ContainerId, - Computer: "Computer", + Computer: v.Computer, PodName: v.PodName, CollectionTime: start.Format(time.RFC3339), ConfigErrorTime: v.ErrorTimeStamp, From 57556b7ea333f1c8a43fdc6da27651beeb3c6ce8 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 21:54:49 -0700 Subject: [PATCH 038/117] changes --- source/code/go/src/plugins/oms.go | 6 +++++- source/code/go/src/plugins/telemetry.go | 12 ++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 4ea9a8644..fde65eb07 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -88,6 +88,10 @@ var ( ContainerLogTelemetryMutex = &sync.Mutex{} // ClientSet for querying KubeAPIs ClientSet *kubernetes.Clientset + // Config error hash + ConfigErrorHash map[string]configErrorDetails + // Prometheus scraping error hash + PromScrapeErrorHash map[string]configErrorDetails ) var ( @@ -701,7 +705,7 @@ func containsKey(currentMap map[string]bool, key string) bool { // GetContainerIDK8sNamespacePodNameFromFileName Gets the container ID, k8s namespace and pod name From the file Name // sample filename kube-proxy-dgcx7_kube-system_kube-proxy-8df7e49e9028b60b5b0d0547f409c455a9567946cf763267b7e6fa053ab8c182.log -func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, string) { +func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, string, string) { id := "" ns := "" podName := "" diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 6a71fd720..be061705c 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -57,13 +57,13 @@ const ( ) // ErrorType to be used as enum -type ErrorType int +// type ErrorType int -const ( - // ErrorType to be used as enum for ConfigError and ScrapingError - ConfigError ErrorType = iota - ScrapingError -) +// const ( +// // ErrorType to be used as enum for ConfigError and ScrapingError +// ConfigError ErrorType = iota +// ScrapingError +// ) // SendContainerLogPluginMetrics is a go-routine that flushes the data periodically (every 5 mins to App Insights) func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { From 0ce5bde4d27ec78f5255a2545897cf6449357b92 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 22:05:48 -0700 Subject: [PATCH 039/117] changes --- source/code/go/src/plugins/oms.go | 38 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index fde65eb07..2d6e58985 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -89,9 +89,9 @@ var ( // ClientSet for querying KubeAPIs ClientSet *kubernetes.Clientset // Config error hash - ConfigErrorHash map[string]configErrorDetails + ConfigErrorHash map[string]ConfigErrorDetails // Prometheus scraping error hash - PromScrapeErrorHash map[string]configErrorDetails + PromScrapeErrorHash map[string]ConfigErrorDetails ) var ( @@ -157,17 +157,17 @@ type laConfigError struct { ConfigErrorLevel string `json:"ConfigErrorLevel"` } -type configErrorDetails struct { - ContainerId string - PodName string - Computer string - ErrorTime string +type ConfigErrorDetails struct { + ContainerId string + PodName string + Computer string + ErrorTimeStamp string } type ConfigErrorBlob struct { - DataType string `json:"DataType"` - IPName string `json:"IPName"` - DataItems []DataItem `json:"DataItems"` + DataType string `json:"DataType"` + IPName string `json:"IPName"` + DataItems []laConfigError `json:"DataItems"` } // ErrorType to be used as enum @@ -312,7 +312,7 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { switch errType { case ConfigError: // Log("configErrorHash\n") - ConfigErrorHash[logRecordString] = configErrorDetails{ + ConfigErrorHash[logRecordString] = ConfigErrorDetails{ ContainerId: containerID, PodName: podName, Computer: Computer, @@ -325,7 +325,7 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { if scrapingSplitString != nil && len(scrapingSplitString) == 2 { var splitString = scrapingSplitString[1] if splitString != "" { - PromScrapeErrorHash[splitString] = configErrorDetails{ + PromScrapeErrorHash[splitString] = ConfigErrorDetails{ ContainerId: containerID, PodName: podName, Computer: Computer, @@ -341,7 +341,7 @@ func flushConfigErrorRecords() { var laConfigErrorRecords []laConfigError start := time.Now() - for k, v := range configErrorHash { + for k, v := range ConfigErrorHash { laConfigErrorRecord := laConfigError{ ConfigErrorMessage: k, ContainerId: v.ContainerId, @@ -355,7 +355,7 @@ func flushConfigErrorRecords() { // Log("key[%s] value[%s]\n", k, v) } - for k, v := range promScrapeErrorHash { + for k, v := range PromScrapeErrorHash { laConfigErrorRecord := laConfigError{ ConfigErrorMessage: k, ContainerId: v.ContainerId, @@ -380,7 +380,7 @@ func flushConfigErrorRecords() { message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) Log(message) SendException(message) - return output.FLB_OK + // return output.FLB_OK } req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) req.Header.Set("Content-Type", "application/json") @@ -397,14 +397,14 @@ func flushConfigErrorRecords() { Log(message) Log("Failed to flush %d records after %s", len(laConfigErrorRecords), elapsed) - return output.FLB_RETRY + // return output.FLB_RETRY } if resp == nil || resp.StatusCode != 200 { if resp != nil { Log("Status %s Status Code %d", resp.Status, resp.StatusCode) } - return output.FLB_RETRY + // return output.FLB_RETRY } defer resp.Body.Close() @@ -749,8 +749,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { NameIDMap = make(map[string]string) // Keeping the two error hashes separate since we need to keep the config error hash for the lifetime of the container // whereas the prometheus scrape error hash needs to be refreshed every hour - ConfigErrorHash := make(map[string]configErrorDetails{}) - PromScrapeErrorHash := make(map[string]configErrorDetails{}) + ConfigErrorHash := make(map[string]ConfigErrorDetails) + PromScrapeErrorHash := make(map[string]ConfigErrorDetails) pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { From 87c39c63a409431e6d4d492c21b53be0f7d3b6d3 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 22:13:51 -0700 Subject: [PATCH 040/117] changes --- source/code/go/src/plugins/oms.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 2d6e58985..296a87ce2 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -305,9 +305,8 @@ func convert(in interface{}) (float64, bool) { // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { var logRecordString = ToString(record["log"]) - var fileName = ToString(record["filepath"]) var errorTimeStamp = ToString(record["time"]) - containerID, k8sNamespace, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) + containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) switch errType { case ConfigError: @@ -582,7 +581,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { DataUpdateMutex.Unlock() for _, record := range tailPluginRecords { - containerID, k8sNamespace, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) + containerID, k8sNamespace, _ := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) logEntrySource := ToString(record["stream"]) if strings.EqualFold(logEntrySource, "stdout") { @@ -749,8 +748,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { NameIDMap = make(map[string]string) // Keeping the two error hashes separate since we need to keep the config error hash for the lifetime of the container // whereas the prometheus scrape error hash needs to be refreshed every hour - ConfigErrorHash := make(map[string]ConfigErrorDetails) - PromScrapeErrorHash := make(map[string]ConfigErrorDetails) + // ConfigErrorHash := make(map[string]ConfigErrorDetails) + // PromScrapeErrorHash := make(map[string]ConfigErrorDetails) pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { From 5ee933baca514d863d3c5adeaa0b6e23c358d7ca Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 22:22:50 -0700 Subject: [PATCH 041/117] changes --- source/code/go/src/plugins/oms.go | 1 + 1 file changed, 1 insertion(+) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 296a87ce2..e598da5de 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -375,6 +375,7 @@ func flushConfigErrorRecords() { DataItems: laConfigErrorRecords} marshalled, err := json.Marshal(configErrorEntry) + Log("configerrorlogdata:\n" + marshalled) if err != nil { message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) Log(message) From f23bdd897cc4eb35fd9d3ea315ca389f366de494 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 16 Sep 2019 22:25:51 -0700 Subject: [PATCH 042/117] changes --- source/code/go/src/plugins/oms.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index e598da5de..56df8a803 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -375,7 +375,9 @@ func flushConfigErrorRecords() { DataItems: laConfigErrorRecords} marshalled, err := json.Marshal(configErrorEntry) - Log("configerrorlogdata:\n" + marshalled) + Log("configerrorlogdata-unmarshalled:\n" + ToString(configErrorEntry)) + + Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) if err != nil { message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) Log(message) From 40f21917eee3535582549f6afebd9abbae9773f9 Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 17 Sep 2019 17:10:40 -0700 Subject: [PATCH 043/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 56df8a803..df5e25de5 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -751,8 +751,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { NameIDMap = make(map[string]string) // Keeping the two error hashes separate since we need to keep the config error hash for the lifetime of the container // whereas the prometheus scrape error hash needs to be refreshed every hour - // ConfigErrorHash := make(map[string]ConfigErrorDetails) - // PromScrapeErrorHash := make(map[string]ConfigErrorDetails) + ConfigErrorHash := make(map[string]ConfigErrorDetails) + PromScrapeErrorHash := make(map[string]ConfigErrorDetails) pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { From 758394e06cb063cf6dacb16fbf6a9d2e32f5331b Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 17 Sep 2019 17:12:44 -0700 Subject: [PATCH 044/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index df5e25de5..0e10caefb 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -751,8 +751,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { NameIDMap = make(map[string]string) // Keeping the two error hashes separate since we need to keep the config error hash for the lifetime of the container // whereas the prometheus scrape error hash needs to be refreshed every hour - ConfigErrorHash := make(map[string]ConfigErrorDetails) - PromScrapeErrorHash := make(map[string]ConfigErrorDetails) + ConfigErrorHash = make(map[string]ConfigErrorDetails) + PromScrapeErrorHash = make(map[string]ConfigErrorDetails) pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { From 16c788a4b7e616b28d43f899904051a449678b3b Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 17 Sep 2019 18:21:23 -0700 Subject: [PATCH 045/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 0e10caefb..2a6778c30 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -730,13 +730,13 @@ func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, str ns = filename[start+1 : end] } - //start = strings.Index(filename, "_") + start = 0 end = strings.Index(filename, "_") if start >= end || start == -1 || end == -1 { podName = "" } else { - podName = filename[0:end] + podName = filename[start:end] } return id, ns, podName From 99a500c41f7d2e19750fa6c70947734caac5dec4 Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 17 Sep 2019 18:48:47 -0700 Subject: [PATCH 046/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 2a6778c30..b333db80a 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -730,13 +730,13 @@ func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, str ns = filename[start+1 : end] } - start = 0 + start = strings.Index(filename, "/containers/") end = strings.Index(filename, "_") if start >= end || start == -1 || end == -1 { podName = "" } else { - podName = filename[start:end] + podName = filename[(start + len("/containers/") + 1) : end] } return id, ns, podName From 1c122a42df3ed125abf9d16f922c6e2c297381a7 Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 17 Sep 2019 18:57:26 -0700 Subject: [PATCH 047/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index b333db80a..882d84a25 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -304,7 +304,7 @@ func convert(in interface{}) (float64, bool) { // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { - var logRecordString = ToString(record["log"]) + var logRecordString = record["log"] var errorTimeStamp = ToString(record["time"]) containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) @@ -736,7 +736,7 @@ func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, str if start >= end || start == -1 || end == -1 { podName = "" } else { - podName = filename[(start + len("/containers/") + 1) : end] + podName = filename[(start + len("/containers/")):end] } return id, ns, podName From f89e5999dfc448d85c61cafa37055132b231d534 Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 17 Sep 2019 18:59:41 -0700 Subject: [PATCH 048/117] changes --- source/code/go/src/plugins/oms.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 882d84a25..922db4c56 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -304,7 +304,7 @@ func convert(in interface{}) (float64, bool) { // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { - var logRecordString = record["log"] + var logRecordString = ToString(record["log"]) var errorTimeStamp = ToString(record["time"]) containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) From eb064b477daa9b783073c1be32ea73448a5386df Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 17 Sep 2019 19:45:38 -0700 Subject: [PATCH 049/117] changes --- source/code/go/src/plugins/oms.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 922db4c56..e471a545a 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -311,6 +311,10 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { switch errType { case ConfigError: // Log("configErrorHash\n") + // Doing this since the error logger library is adding quotes around the string and a newline to the end because + // we are converting string to json to log lines in different lines as one record + logRecordString = logRecordString[1 : len(logRecordString)-1] + logRecordString = strings.TrimSuffix(logRecordString, "\n") ConfigErrorHash[logRecordString] = ConfigErrorDetails{ ContainerId: containerID, PodName: podName, From 48d7c31d5f1bd45fdccfe281d28571bbed6399dc Mon Sep 17 00:00:00 2001 From: rashmy Date: Tue, 17 Sep 2019 20:09:46 -0700 Subject: [PATCH 050/117] changes --- source/code/go/src/plugins/oms.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index e471a545a..005aeca63 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -314,7 +314,7 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { // Doing this since the error logger library is adding quotes around the string and a newline to the end because // we are converting string to json to log lines in different lines as one record logRecordString = logRecordString[1 : len(logRecordString)-1] - logRecordString = strings.TrimSuffix(logRecordString, "\n") + // logRecordString = strings.TrimSuffix(logRecordString, "\n") ConfigErrorHash[logRecordString] = ConfigErrorDetails{ ContainerId: containerID, PodName: podName, From f7952b89d9eb540dcd15ab16f9b776a0f68f000d Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 18 Sep 2019 10:37:15 -0700 Subject: [PATCH 051/117] changes --- source/code/go/src/plugins/oms.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 005aeca63..02d1657e8 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -313,7 +313,7 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { // Log("configErrorHash\n") // Doing this since the error logger library is adding quotes around the string and a newline to the end because // we are converting string to json to log lines in different lines as one record - logRecordString = logRecordString[1 : len(logRecordString)-1] + // logRecordString = logRecordString[1 : len(logRecordString)-1] // logRecordString = strings.TrimSuffix(logRecordString, "\n") ConfigErrorHash[logRecordString] = ConfigErrorDetails{ ContainerId: containerID, From 0c64f8d9696407f54372bcb4df8728a371d3b05c Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 18 Sep 2019 10:52:10 -0700 Subject: [PATCH 052/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 02d1657e8..c8d9ca7c4 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -313,8 +313,8 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { // Log("configErrorHash\n") // Doing this since the error logger library is adding quotes around the string and a newline to the end because // we are converting string to json to log lines in different lines as one record - // logRecordString = logRecordString[1 : len(logRecordString)-1] - // logRecordString = strings.TrimSuffix(logRecordString, "\n") + logRecordString = strings.TrimSuffix(logRecordString, "\n") + logRecordString = logRecordString[1 : len(logRecordString)-1] ConfigErrorHash[logRecordString] = ConfigErrorDetails{ ContainerId: containerID, PodName: podName, From b4d8c430e08bfc4afe15a2ef1476e8031becf6ed Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 18 Sep 2019 11:20:54 -0700 Subject: [PATCH 053/117] changes --- source/code/go/src/plugins/oms.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index c8d9ca7c4..9aaaddee3 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -327,6 +327,8 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") if scrapingSplitString != nil && len(scrapingSplitString) == 2 { var splitString = scrapingSplitString[1] + // Trimming the newline character at the end since this is being added as the key + splitString = strings.TrimSuffix(splitString, "\n") if splitString != "" { PromScrapeErrorHash[splitString] = ConfigErrorDetails{ ContainerId: containerID, From 8b9eb07ce6be173a24df7d0ffa7faac815b1683f Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 19 Sep 2019 16:38:19 -0700 Subject: [PATCH 054/117] changes --- source/code/go/src/plugins/oms.go | 159 +++++++++++++++++------------- 1 file changed, 93 insertions(+), 66 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 9aaaddee3..d217eebc9 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -53,6 +53,7 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp // IPName for Container Log const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 +const kubeMonAgentConfigEventFlushInterval = 120 var ( // PluginConfiguration the plugins configuration @@ -97,6 +98,8 @@ var ( var ( // ContainerImageNameRefreshTicker updates the container image and names periodically ContainerImageNameRefreshTicker *time.Ticker + // KubeMonAgentConfigEventsSendTicker to send config events every hour + KubeMonAgentConfigEventsSendTicker *time.Ticker ) var ( @@ -148,11 +151,14 @@ type ContainerLogBlob struct { // Config Error message to be sent to Log Analytics type laConfigError struct { + CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated + Computer string `json:"Computer"` + // Category string `json:"Category"` + // Level string `json:"Level"` + // Details string `json:"Details"` ConfigErrorMessage string `json:"ConfigErrorMessage"` ContainerId string `json:"ContainerId"` PodName string `json:"PodName"` - CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated - Computer string `json:"Computer"` ConfigErrorTime string `json:"ConfigErrorTime"` ConfigErrorLevel string `json:"ConfigErrorLevel"` } @@ -343,81 +349,99 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { // Function to get config error log records after iterating through the two hashes func flushConfigErrorRecords() { - var laConfigErrorRecords []laConfigError - start := time.Now() - - for k, v := range ConfigErrorHash { - laConfigErrorRecord := laConfigError{ - ConfigErrorMessage: k, - ContainerId: v.ContainerId, - Computer: v.Computer, - PodName: v.PodName, - CollectionTime: start.Format(time.RFC3339), - ConfigErrorTime: v.ErrorTimeStamp, - ConfigErrorLevel: "Error", + for ; true; <-KubeMonAgentConfigEventsSendTicker.C { + var laConfigErrorRecords []laConfigError + start := time.Now() + + for k, v := range ConfigErrorHash { + laConfigErrorRecord := laConfigError{ + ConfigErrorMessage: k, + ContainerId: v.ContainerId, + Computer: v.Computer, + PodName: v.PodName, + CollectionTime: start.Format(time.RFC3339), + ConfigErrorTime: v.ErrorTimeStamp, + ConfigErrorLevel: "Error", + } + laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) + // Log("key[%s] value[%s]\n", k, v) } - laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) - // Log("key[%s] value[%s]\n", k, v) - } - - for k, v := range PromScrapeErrorHash { - laConfigErrorRecord := laConfigError{ - ConfigErrorMessage: k, - ContainerId: v.ContainerId, - Computer: v.Computer, - PodName: v.PodName, - CollectionTime: start.Format(time.RFC3339), - ConfigErrorTime: v.ErrorTimeStamp, - ConfigErrorLevel: "Warning", + + for k, v := range PromScrapeErrorHash { + laConfigErrorRecord := laConfigError{ + ConfigErrorMessage: k, + ContainerId: v.ContainerId, + Computer: v.Computer, + PodName: v.PodName, + CollectionTime: start.Format(time.RFC3339), + ConfigErrorTime: v.ErrorTimeStamp, + ConfigErrorLevel: "Warning", + } + laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) + // Log("key[%s] value[%s]\n", k, v) } - laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) - // Log("key[%s] value[%s]\n", k, v) - } - if len(laConfigErrorRecords) > 0 { - configErrorEntry := ConfigErrorBlob{ - DataType: ConfigErrorDataType, - IPName: IPName, - DataItems: laConfigErrorRecords} + if len(laConfigErrorRecords) > 0 { + configErrorEntry := ConfigErrorBlob{ + DataType: ConfigErrorDataType, + IPName: IPName, + DataItems: laConfigErrorRecords} + + marshalled, err := json.Marshal(configErrorEntry) + Log("configerrorlogdata-unmarshalled:\n" + ToString(configErrorEntry)) + + Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + Log(message) + SendException(message) + // return output.FLB_OK + } + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) + req.Header.Set("Content-Type", "application/json") + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) + } - marshalled, err := json.Marshal(configErrorEntry) - Log("configerrorlogdata-unmarshalled:\n" + ToString(configErrorEntry)) + resp, err := HTTPClient.Do(req) + elapsed := time.Since(start) - Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) - if err != nil { - message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) - Log(message) - SendException(message) - // return output.FLB_OK - } - req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) - req.Header.Set("Content-Type", "application/json") - //expensive to do string len for every request, so use a flag - // if ResourceCentric == true { - // req.Header.Set("x-ms-AzureResourceId", ResourceID) - // } + if err != nil { + message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) + Log(message) + Log("Failed to flush %d records after %s", len(laConfigErrorRecords), elapsed) - resp, err := HTTPClient.Do(req) - elapsed := time.Since(start) + // return output.FLB_RETRY + } - if err != nil { - message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) - Log(message) - Log("Failed to flush %d records after %s", len(laConfigErrorRecords), elapsed) + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + } + // return output.FLB_RETRY + } - // return output.FLB_RETRY - } + defer resp.Body.Close() + numRecords := len(laConfigErrorRecords) + Log("Successfully flushed %d records in %s", numRecords, elapsed) - if resp == nil || resp.StatusCode != 200 { - if resp != nil { - Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour + Log("PromScrapeErrorHash before:\n") + for k := range PromScrapeErrorHash { + Log(PromScrapeErrorHash[k]) } - // return output.FLB_RETRY - } - defer resp.Body.Close() - numRecords := len(laConfigErrorRecords) - Log("Successfully flushed %d records in %s", numRecords, elapsed) + for k := range PromScrapeErrorHash { + delete(PromScrapeErrorHash, k) + } + + Log("PromScrapeErrorHash after:\n") + for k := range PromScrapeErrorHash { + Log(PromScrapeErrorHash[k]) + } + + } } } @@ -813,6 +837,9 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval) ContainerImageNameRefreshTicker = time.NewTicker(time.Second * time.Duration(containerInventoryRefreshInterval)) + Log("kubeMonAgentConfigEventFlushInterval = %d \n", kubeMonAgentConfigEventFlushInterval) + KubeMonAgentConfigEventsSendTicker = time.NewTicker(time.Second * time.Duration(kubeMonAgentConfigEventFlushInterval)) + // Populate Computer field containerHostName, err := ioutil.ReadFile(pluginConfig["container_host_file_path"]) if err != nil { From a6e62a9dcea9b483b4ad6eaf935a04c75959f92f Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 19 Sep 2019 16:40:49 -0700 Subject: [PATCH 055/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index d217eebc9..3f6fa5491 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -429,7 +429,7 @@ func flushConfigErrorRecords() { //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour Log("PromScrapeErrorHash before:\n") for k := range PromScrapeErrorHash { - Log(PromScrapeErrorHash[k]) + Log(ToString(PromScrapeErrorHash[k])) } for k := range PromScrapeErrorHash { @@ -438,7 +438,7 @@ func flushConfigErrorRecords() { Log("PromScrapeErrorHash after:\n") for k := range PromScrapeErrorHash { - Log(PromScrapeErrorHash[k]) + Log(ToString(PromScrapeErrorHash[k])) } } From 6cf40039840821a2b1f55acb111c2af8cf9cecee Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 19 Sep 2019 17:40:11 -0700 Subject: [PATCH 056/117] changes --- source/code/go/src/plugins/oms.go | 155 ++++++++++++------------ source/code/go/src/plugins/telemetry.go | 5 +- 2 files changed, 80 insertions(+), 80 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 3f6fa5491..824c50b99 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -349,98 +349,95 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { // Function to get config error log records after iterating through the two hashes func flushConfigErrorRecords() { - for ; true; <-KubeMonAgentConfigEventsSendTicker.C { - var laConfigErrorRecords []laConfigError - start := time.Now() - - for k, v := range ConfigErrorHash { - laConfigErrorRecord := laConfigError{ - ConfigErrorMessage: k, - ContainerId: v.ContainerId, - Computer: v.Computer, - PodName: v.PodName, - CollectionTime: start.Format(time.RFC3339), - ConfigErrorTime: v.ErrorTimeStamp, - ConfigErrorLevel: "Error", - } - laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) - // Log("key[%s] value[%s]\n", k, v) - } - - for k, v := range PromScrapeErrorHash { - laConfigErrorRecord := laConfigError{ - ConfigErrorMessage: k, - ContainerId: v.ContainerId, - Computer: v.Computer, - PodName: v.PodName, - CollectionTime: start.Format(time.RFC3339), - ConfigErrorTime: v.ErrorTimeStamp, - ConfigErrorLevel: "Warning", - } - laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) - // Log("key[%s] value[%s]\n", k, v) + var laConfigErrorRecords []laConfigError + start := time.Now() + + for k, v := range ConfigErrorHash { + laConfigErrorRecord := laConfigError{ + ConfigErrorMessage: k, + ContainerId: v.ContainerId, + Computer: v.Computer, + PodName: v.PodName, + CollectionTime: start.Format(time.RFC3339), + ConfigErrorTime: v.ErrorTimeStamp, + ConfigErrorLevel: "Error", + } + laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) + // Log("key[%s] value[%s]\n", k, v) + } + + for k, v := range PromScrapeErrorHash { + laConfigErrorRecord := laConfigError{ + ConfigErrorMessage: k, + ContainerId: v.ContainerId, + Computer: v.Computer, + PodName: v.PodName, + CollectionTime: start.Format(time.RFC3339), + ConfigErrorTime: v.ErrorTimeStamp, + ConfigErrorLevel: "Warning", } + laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) + // Log("key[%s] value[%s]\n", k, v) + } - if len(laConfigErrorRecords) > 0 { - configErrorEntry := ConfigErrorBlob{ - DataType: ConfigErrorDataType, - IPName: IPName, - DataItems: laConfigErrorRecords} + if len(laConfigErrorRecords) > 0 { + configErrorEntry := ConfigErrorBlob{ + DataType: ConfigErrorDataType, + IPName: IPName, + DataItems: laConfigErrorRecords} - marshalled, err := json.Marshal(configErrorEntry) - Log("configerrorlogdata-unmarshalled:\n" + ToString(configErrorEntry)) + marshalled, err := json.Marshal(configErrorEntry) + Log("configerrorlogdata-unmarshalled:\n" + ToString(configErrorEntry)) - Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) - if err != nil { - message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) - Log(message) - SendException(message) - // return output.FLB_OK - } - req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) - req.Header.Set("Content-Type", "application/json") - //expensive to do string len for every request, so use a flag - if ResourceCentric == true { - req.Header.Set("x-ms-AzureResourceId", ResourceID) - } + Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + Log(message) + SendException(message) + // return output.FLB_OK + } + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) + req.Header.Set("Content-Type", "application/json") + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) + } - resp, err := HTTPClient.Do(req) - elapsed := time.Since(start) + resp, err := HTTPClient.Do(req) + elapsed := time.Since(start) - if err != nil { - message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) - Log(message) - Log("Failed to flush %d records after %s", len(laConfigErrorRecords), elapsed) + if err != nil { + message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) + Log(message) + Log("Failed to flush %d records after %s", len(laConfigErrorRecords), elapsed) - // return output.FLB_RETRY - } + // return output.FLB_RETRY + } - if resp == nil || resp.StatusCode != 200 { - if resp != nil { - Log("Status %s Status Code %d", resp.Status, resp.StatusCode) - } - // return output.FLB_RETRY + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("Status %s Status Code %d", resp.Status, resp.StatusCode) } + // return output.FLB_RETRY + } - defer resp.Body.Close() - numRecords := len(laConfigErrorRecords) - Log("Successfully flushed %d records in %s", numRecords, elapsed) - - //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - Log("PromScrapeErrorHash before:\n") - for k := range PromScrapeErrorHash { - Log(ToString(PromScrapeErrorHash[k])) - } + defer resp.Body.Close() + numRecords := len(laConfigErrorRecords) + Log("Successfully flushed %d records in %s", numRecords, elapsed) - for k := range PromScrapeErrorHash { - delete(PromScrapeErrorHash, k) - } + //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour + Log("PromScrapeErrorHash before:\n") + for k := range PromScrapeErrorHash { + Log(ToString(PromScrapeErrorHash[k])) + } - Log("PromScrapeErrorHash after:\n") - for k := range PromScrapeErrorHash { - Log(ToString(PromScrapeErrorHash[k])) - } + for k := range PromScrapeErrorHash { + delete(PromScrapeErrorHash, k) + } + Log("PromScrapeErrorHash after:\n") + for k := range PromScrapeErrorHash { + Log(ToString(PromScrapeErrorHash[k])) } } } diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index be061705c..34a37f500 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -217,7 +217,10 @@ func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLeve } } - flushConfigErrorRecords() + for ; true; <-KubeMonAgentConfigEventsSendTicker.C { + Log("Flushing config error records\n") + flushConfigErrorRecords() + } traceEntry := strings.Join(logLines, "\n") traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel) traceTelemetryItem.Properties["tag"] = tag From 3914a0e4e2e3b48c81111ecfed527c95a64f27b8 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 19 Sep 2019 17:56:21 -0700 Subject: [PATCH 057/117] changes --- source/code/go/src/plugins/oms.go | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 824c50b99..1f9d54ff9 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -53,7 +53,7 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp // IPName for Container Log const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 -const kubeMonAgentConfigEventFlushInterval = 120 +const kubeMonAgentConfigEventFlushInterval = 300 var ( // PluginConfiguration the plugins configuration @@ -387,7 +387,6 @@ func flushConfigErrorRecords() { DataItems: laConfigErrorRecords} marshalled, err := json.Marshal(configErrorEntry) - Log("configerrorlogdata-unmarshalled:\n" + ToString(configErrorEntry)) Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) if err != nil { @@ -426,19 +425,9 @@ func flushConfigErrorRecords() { Log("Successfully flushed %d records in %s", numRecords, elapsed) //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - Log("PromScrapeErrorHash before:\n") - for k := range PromScrapeErrorHash { - Log(ToString(PromScrapeErrorHash[k])) - } - for k := range PromScrapeErrorHash { delete(PromScrapeErrorHash, k) } - - Log("PromScrapeErrorHash after:\n") - for k := range PromScrapeErrorHash { - Log(ToString(PromScrapeErrorHash[k])) - } } } From 16df0ed16dc3b3a8a3efdd933e476e8be9386885 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 19 Sep 2019 21:29:20 -0700 Subject: [PATCH 058/117] changes --- source/code/go/src/plugins/oms.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 1f9d54ff9..5625c1c46 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -349,6 +349,7 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { // Function to get config error log records after iterating through the two hashes func flushConfigErrorRecords() { + Log("In flushConfigErrorRecords\n") var laConfigErrorRecords []laConfigError start := time.Now() @@ -363,7 +364,7 @@ func flushConfigErrorRecords() { ConfigErrorLevel: "Error", } laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) - // Log("key[%s] value[%s]\n", k, v) + Log("key[%s] value[%s]\n", k, v) } for k, v := range PromScrapeErrorHash { @@ -377,7 +378,7 @@ func flushConfigErrorRecords() { ConfigErrorLevel: "Warning", } laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) - // Log("key[%s] value[%s]\n", k, v) + Log("key[%s] value[%s]\n", k, v) } if len(laConfigErrorRecords) > 0 { From f30fe44e84bb799eac56333451be3f991a765853 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 19 Sep 2019 21:39:48 -0700 Subject: [PATCH 059/117] changes --- source/code/go/src/plugins/oms.go | 2 +- source/code/go/src/plugins/telemetry.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 5625c1c46..8246b83a0 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -53,7 +53,7 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp // IPName for Container Log const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 -const kubeMonAgentConfigEventFlushInterval = 300 +const kubeMonAgentConfigEventFlushInterval = 120 var ( // PluginConfiguration the plugins configuration diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 34a37f500..8917eb8da 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -217,10 +217,10 @@ func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLeve } } - for ; true; <-KubeMonAgentConfigEventsSendTicker.C { - Log("Flushing config error records\n") - flushConfigErrorRecords() - } + // for ; true; <-KubeMonAgentConfigEventsSendTicker.C { + // Log("Flushing config error records\n") + flushConfigErrorRecords() + // } traceEntry := strings.Join(logLines, "\n") traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel) traceTelemetryItem.Properties["tag"] = tag From 40739f711ef4dbc62361b25afb3afe9d53a66516 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 19 Sep 2019 21:53:28 -0700 Subject: [PATCH 060/117] changes --- source/code/go/src/plugins/oms.go | 67 +++++++++++++++++-------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 8246b83a0..62fb1b0a2 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -53,7 +53,7 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp // IPName for Container Log const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 -const kubeMonAgentConfigEventFlushInterval = 120 +const kubeMonAgentConfigEventFlushInterval = 300 var ( // PluginConfiguration the plugins configuration @@ -310,37 +310,39 @@ func convert(in interface{}) (float64, bool) { // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { - var logRecordString = ToString(record["log"]) - var errorTimeStamp = ToString(record["time"]) - containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) - - switch errType { - case ConfigError: - // Log("configErrorHash\n") - // Doing this since the error logger library is adding quotes around the string and a newline to the end because - // we are converting string to json to log lines in different lines as one record - logRecordString = strings.TrimSuffix(logRecordString, "\n") - logRecordString = logRecordString[1 : len(logRecordString)-1] - ConfigErrorHash[logRecordString] = ConfigErrorDetails{ - ContainerId: containerID, - PodName: podName, - Computer: Computer, - ErrorTimeStamp: errorTimeStamp, - } + for ; true; <-KubeMonAgentConfigEventsSendTicker.C { + var logRecordString = ToString(record["log"]) + var errorTimeStamp = ToString(record["time"]) + containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) + + switch errType { + case ConfigError: + // Log("configErrorHash\n") + // Doing this since the error logger library is adding quotes around the string and a newline to the end because + // we are converting string to json to log lines in different lines as one record + logRecordString = strings.TrimSuffix(logRecordString, "\n") + logRecordString = logRecordString[1 : len(logRecordString)-1] + ConfigErrorHash[logRecordString] = ConfigErrorDetails{ + ContainerId: containerID, + PodName: podName, + Computer: Computer, + ErrorTimeStamp: errorTimeStamp, + } - case ScrapingError: - // Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash - var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") - if scrapingSplitString != nil && len(scrapingSplitString) == 2 { - var splitString = scrapingSplitString[1] - // Trimming the newline character at the end since this is being added as the key - splitString = strings.TrimSuffix(splitString, "\n") - if splitString != "" { - PromScrapeErrorHash[splitString] = ConfigErrorDetails{ - ContainerId: containerID, - PodName: podName, - Computer: Computer, - ErrorTimeStamp: errorTimeStamp, + case ScrapingError: + // Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash + var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") + if scrapingSplitString != nil && len(scrapingSplitString) == 2 { + var splitString = scrapingSplitString[1] + // Trimming the newline character at the end since this is being added as the key + splitString = strings.TrimSuffix(splitString, "\n") + if splitString != "" { + PromScrapeErrorHash[splitString] = ConfigErrorDetails{ + ContainerId: containerID, + PodName: podName, + Computer: Computer, + ErrorTimeStamp: errorTimeStamp, + } } } } @@ -872,4 +874,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { } else { Log("Running in replicaset. Disabling container enrichment caching & updates \n") } + + // Flush config error records every hour + go flushConfigErrorRecords() } From b8a13302ab33415e068f1311ce81b030525d7bbb Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 19 Sep 2019 21:54:31 -0700 Subject: [PATCH 061/117] changes --- source/code/go/src/plugins/oms.go | 198 +++++++++++++++--------------- 1 file changed, 99 insertions(+), 99 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 62fb1b0a2..54ab8de4c 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -310,39 +310,37 @@ func convert(in interface{}) (float64, bool) { // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { - for ; true; <-KubeMonAgentConfigEventsSendTicker.C { - var logRecordString = ToString(record["log"]) - var errorTimeStamp = ToString(record["time"]) - containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) - - switch errType { - case ConfigError: - // Log("configErrorHash\n") - // Doing this since the error logger library is adding quotes around the string and a newline to the end because - // we are converting string to json to log lines in different lines as one record - logRecordString = strings.TrimSuffix(logRecordString, "\n") - logRecordString = logRecordString[1 : len(logRecordString)-1] - ConfigErrorHash[logRecordString] = ConfigErrorDetails{ - ContainerId: containerID, - PodName: podName, - Computer: Computer, - ErrorTimeStamp: errorTimeStamp, - } + var logRecordString = ToString(record["log"]) + var errorTimeStamp = ToString(record["time"]) + containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) + + switch errType { + case ConfigError: + // Log("configErrorHash\n") + // Doing this since the error logger library is adding quotes around the string and a newline to the end because + // we are converting string to json to log lines in different lines as one record + logRecordString = strings.TrimSuffix(logRecordString, "\n") + logRecordString = logRecordString[1 : len(logRecordString)-1] + ConfigErrorHash[logRecordString] = ConfigErrorDetails{ + ContainerId: containerID, + PodName: podName, + Computer: Computer, + ErrorTimeStamp: errorTimeStamp, + } - case ScrapingError: - // Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash - var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") - if scrapingSplitString != nil && len(scrapingSplitString) == 2 { - var splitString = scrapingSplitString[1] - // Trimming the newline character at the end since this is being added as the key - splitString = strings.TrimSuffix(splitString, "\n") - if splitString != "" { - PromScrapeErrorHash[splitString] = ConfigErrorDetails{ - ContainerId: containerID, - PodName: podName, - Computer: Computer, - ErrorTimeStamp: errorTimeStamp, - } + case ScrapingError: + // Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash + var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") + if scrapingSplitString != nil && len(scrapingSplitString) == 2 { + var splitString = scrapingSplitString[1] + // Trimming the newline character at the end since this is being added as the key + splitString = strings.TrimSuffix(splitString, "\n") + if splitString != "" { + PromScrapeErrorHash[splitString] = ConfigErrorDetails{ + ContainerId: containerID, + PodName: podName, + Computer: Computer, + ErrorTimeStamp: errorTimeStamp, } } } @@ -351,85 +349,87 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { // Function to get config error log records after iterating through the two hashes func flushConfigErrorRecords() { - Log("In flushConfigErrorRecords\n") - var laConfigErrorRecords []laConfigError - start := time.Now() - - for k, v := range ConfigErrorHash { - laConfigErrorRecord := laConfigError{ - ConfigErrorMessage: k, - ContainerId: v.ContainerId, - Computer: v.Computer, - PodName: v.PodName, - CollectionTime: start.Format(time.RFC3339), - ConfigErrorTime: v.ErrorTimeStamp, - ConfigErrorLevel: "Error", + for ; true; <-KubeMonAgentConfigEventsSendTicker.C { + Log("In flushConfigErrorRecords\n") + var laConfigErrorRecords []laConfigError + start := time.Now() + + for k, v := range ConfigErrorHash { + laConfigErrorRecord := laConfigError{ + ConfigErrorMessage: k, + ContainerId: v.ContainerId, + Computer: v.Computer, + PodName: v.PodName, + CollectionTime: start.Format(time.RFC3339), + ConfigErrorTime: v.ErrorTimeStamp, + ConfigErrorLevel: "Error", + } + laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) + Log("key[%s] value[%s]\n", k, v) } - laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) - Log("key[%s] value[%s]\n", k, v) - } - - for k, v := range PromScrapeErrorHash { - laConfigErrorRecord := laConfigError{ - ConfigErrorMessage: k, - ContainerId: v.ContainerId, - Computer: v.Computer, - PodName: v.PodName, - CollectionTime: start.Format(time.RFC3339), - ConfigErrorTime: v.ErrorTimeStamp, - ConfigErrorLevel: "Warning", + + for k, v := range PromScrapeErrorHash { + laConfigErrorRecord := laConfigError{ + ConfigErrorMessage: k, + ContainerId: v.ContainerId, + Computer: v.Computer, + PodName: v.PodName, + CollectionTime: start.Format(time.RFC3339), + ConfigErrorTime: v.ErrorTimeStamp, + ConfigErrorLevel: "Warning", + } + laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) + Log("key[%s] value[%s]\n", k, v) } - laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) - Log("key[%s] value[%s]\n", k, v) - } - if len(laConfigErrorRecords) > 0 { - configErrorEntry := ConfigErrorBlob{ - DataType: ConfigErrorDataType, - IPName: IPName, - DataItems: laConfigErrorRecords} + if len(laConfigErrorRecords) > 0 { + configErrorEntry := ConfigErrorBlob{ + DataType: ConfigErrorDataType, + IPName: IPName, + DataItems: laConfigErrorRecords} - marshalled, err := json.Marshal(configErrorEntry) + marshalled, err := json.Marshal(configErrorEntry) - Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) - if err != nil { - message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) - Log(message) - SendException(message) - // return output.FLB_OK - } - req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) - req.Header.Set("Content-Type", "application/json") - //expensive to do string len for every request, so use a flag - if ResourceCentric == true { - req.Header.Set("x-ms-AzureResourceId", ResourceID) - } + Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + Log(message) + SendException(message) + // return output.FLB_OK + } + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) + req.Header.Set("Content-Type", "application/json") + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) + } - resp, err := HTTPClient.Do(req) - elapsed := time.Since(start) + resp, err := HTTPClient.Do(req) + elapsed := time.Since(start) - if err != nil { - message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) - Log(message) - Log("Failed to flush %d records after %s", len(laConfigErrorRecords), elapsed) + if err != nil { + message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) + Log(message) + Log("Failed to flush %d records after %s", len(laConfigErrorRecords), elapsed) - // return output.FLB_RETRY - } + // return output.FLB_RETRY + } - if resp == nil || resp.StatusCode != 200 { - if resp != nil { - Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + } + // return output.FLB_RETRY } - // return output.FLB_RETRY - } - defer resp.Body.Close() - numRecords := len(laConfigErrorRecords) - Log("Successfully flushed %d records in %s", numRecords, elapsed) + defer resp.Body.Close() + numRecords := len(laConfigErrorRecords) + Log("Successfully flushed %d records in %s", numRecords, elapsed) - //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - for k := range PromScrapeErrorHash { - delete(PromScrapeErrorHash, k) + //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour + for k := range PromScrapeErrorHash { + delete(PromScrapeErrorHash, k) + } } } } From eed00aa23f400b97398540a8b22b3b808321a57a Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 20 Sep 2019 15:29:44 -0700 Subject: [PATCH 062/117] changes --- source/code/go/src/plugins/oms.go | 17 ++++++++++++----- source/code/go/src/plugins/telemetry.go | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 54ab8de4c..099d8f62d 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -93,6 +93,8 @@ var ( ConfigErrorHash map[string]ConfigErrorDetails // Prometheus scraping error hash PromScrapeErrorHash map[string]ConfigErrorDetails + // EventHashUpdateMutex read and write mutex access to the event hash + EventHashUpdateMutex = &sync.Mutex{} ) var ( @@ -314,6 +316,8 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { var errorTimeStamp = ToString(record["time"]) containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) + Log("Updating config event hash - Locking for update \n ") + EventHashUpdateMutex.Lock() switch errType { case ConfigError: // Log("configErrorHash\n") @@ -345,6 +349,8 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { } } } + EventHashUpdateMutex.Unlock() + Log("Updating config event hash - Unlocked after update \n ") } // Function to get config error log records after iterating through the two hashes @@ -427,9 +433,9 @@ func flushConfigErrorRecords() { Log("Successfully flushed %d records in %s", numRecords, elapsed) //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - for k := range PromScrapeErrorHash { - delete(PromScrapeErrorHash, k) - } + // for k := range PromScrapeErrorHash { + // delete(PromScrapeErrorHash, k) + // } } } } @@ -871,10 +877,11 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { populateExcludedStdoutNamespaces() populateExcludedStderrNamespaces() go updateContainerImageNameMaps() + + // Flush config error records every hour + go flushConfigErrorRecords() } else { Log("Running in replicaset. Disabling container enrichment caching & updates \n") } - // Flush config error records every hour - go flushConfigErrorRecords() } diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 8917eb8da..dc025e648 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -219,7 +219,7 @@ func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLeve // for ; true; <-KubeMonAgentConfigEventsSendTicker.C { // Log("Flushing config error records\n") - flushConfigErrorRecords() + // flushConfigErrorRecords() // } traceEntry := strings.Join(logLines, "\n") traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel) From 2b333e0d4812a40603c76eb90004dd739e6fde95 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 20 Sep 2019 15:56:11 -0700 Subject: [PATCH 063/117] changes --- source/code/go/src/plugins/oms.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 099d8f62d..326189ca8 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -360,6 +360,7 @@ func flushConfigErrorRecords() { var laConfigErrorRecords []laConfigError start := time.Now() + EventHashUpdateMutex.Lock() for k, v := range ConfigErrorHash { laConfigErrorRecord := laConfigError{ ConfigErrorMessage: k, @@ -371,7 +372,7 @@ func flushConfigErrorRecords() { ConfigErrorLevel: "Error", } laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) - Log("key[%s] value[%s]\n", k, v) + // Log("key[%s] value[%s]\n", k, v) } for k, v := range PromScrapeErrorHash { @@ -385,8 +386,9 @@ func flushConfigErrorRecords() { ConfigErrorLevel: "Warning", } laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) - Log("key[%s] value[%s]\n", k, v) + // Log("key[%s] value[%s]\n", k, v) } + EventHashUpdateMutex.UnLock() if len(laConfigErrorRecords) > 0 { configErrorEntry := ConfigErrorBlob{ @@ -433,9 +435,15 @@ func flushConfigErrorRecords() { Log("Successfully flushed %d records in %s", numRecords, elapsed) //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour + EventHashUpdateMutex.Lock() // for k := range PromScrapeErrorHash { // delete(PromScrapeErrorHash, k) // } + for k := range ConfigErrorHash { + delete(ConfigErrorHash, k) + } + EventHashUpdateMutex.Unlock() + } } } From be7529c6e34fc6be7f0853f8a07e59caded0f477 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 20 Sep 2019 16:00:58 -0700 Subject: [PATCH 064/117] changes --- source/code/go/src/plugins/oms.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 326189ca8..3e9c3c19e 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -388,7 +388,7 @@ func flushConfigErrorRecords() { laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) // Log("key[%s] value[%s]\n", k, v) } - EventHashUpdateMutex.UnLock() + EventHashUpdateMutex.Unlock() if len(laConfigErrorRecords) > 0 { configErrorEntry := ConfigErrorBlob{ From cbf5fe01ae71a847df7a18c1b76aade96e0e463b Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 20 Sep 2019 19:36:26 -0700 Subject: [PATCH 065/117] changes --- source/code/go/src/plugins/oms.go | 83 ++++++++++++------------- source/code/go/src/plugins/telemetry.go | 4 +- 2 files changed, 42 insertions(+), 45 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 3e9c3c19e..06f2d6a86 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -90,9 +90,9 @@ var ( // ClientSet for querying KubeAPIs ClientSet *kubernetes.Clientset // Config error hash - ConfigErrorHash map[string]ConfigErrorDetails + ConfigErrorEvent map[string]KubeMonAgentEventDetails // Prometheus scraping error hash - PromScrapeErrorHash map[string]ConfigErrorDetails + PromScrapeErrorEvent map[string]KubeMonAgentEventDetails // EventHashUpdateMutex read and write mutex access to the event hash EventHashUpdateMutex = &sync.Mutex{} ) @@ -152,7 +152,7 @@ type ContainerLogBlob struct { } // Config Error message to be sent to Log Analytics -type laConfigError struct { +type laKubeMonAgentEvents struct { CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated Computer string `json:"Computer"` // Category string `json:"Category"` @@ -165,30 +165,30 @@ type laConfigError struct { ConfigErrorLevel string `json:"ConfigErrorLevel"` } -type ConfigErrorDetails struct { +type KubeMonAgentEventDetails struct { ContainerId string PodName string Computer string ErrorTimeStamp string } -type ConfigErrorBlob struct { - DataType string `json:"DataType"` - IPName string `json:"IPName"` - DataItems []laConfigError `json:"DataItems"` +type KubeMonAgentEventBlob struct { + DataType string `json:"DataType"` + IPName string `json:"IPName"` + DataItems []laKubeMonAgentEvents `json:"DataItems"` } -// ErrorType to be used as enum -type ErrorType int +// KubeMonAgentEventType to be used as enum +type KubeMonAgentEventType int const ( - // ErrorType to be used as enum for ConfigError and ScrapingError - ConfigError ErrorType = iota - ScrapingError + // KubeMonAgentEventType to be used as enum for ConfigError and ScrapingError + ConfigErrorEvent KubeMonAgentEventType = iota + ScrapingErrorEvent ) // DataType for Config error -const ConfigErrorDataType = "CONFIG_ERROR_BLOB" +const KubeMonAgentEventDataType = "KUBE_MON_AGENT_EVENTS_BLOB" func createLogger() *log.Logger { var logfile *os.File @@ -311,7 +311,7 @@ func convert(in interface{}) (float64, bool) { } // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA -func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { +func populateErrorHash(record map[interface{}]interface{}, errType KubeMonAgentEventType) { var logRecordString = ToString(record["log"]) var errorTimeStamp = ToString(record["time"]) containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) @@ -319,20 +319,20 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { Log("Updating config event hash - Locking for update \n ") EventHashUpdateMutex.Lock() switch errType { - case ConfigError: - // Log("configErrorHash\n") + case ConfigErrorEvent: + // Log("ConfigErrorEvent\n") // Doing this since the error logger library is adding quotes around the string and a newline to the end because // we are converting string to json to log lines in different lines as one record logRecordString = strings.TrimSuffix(logRecordString, "\n") logRecordString = logRecordString[1 : len(logRecordString)-1] - ConfigErrorHash[logRecordString] = ConfigErrorDetails{ + ConfigErrorEvent[logRecordString] = KubeMonAgentEventDetails{ ContainerId: containerID, PodName: podName, Computer: Computer, ErrorTimeStamp: errorTimeStamp, } - case ScrapingError: + case ScrapingErrorEvent: // Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") if scrapingSplitString != nil && len(scrapingSplitString) == 2 { @@ -340,7 +340,7 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { // Trimming the newline character at the end since this is being added as the key splitString = strings.TrimSuffix(splitString, "\n") if splitString != "" { - PromScrapeErrorHash[splitString] = ConfigErrorDetails{ + PromScrapeErrorEvent[splitString] = KubeMonAgentEventDetails{ ContainerId: containerID, PodName: podName, Computer: Computer, @@ -354,15 +354,15 @@ func populateErrorHash(record map[interface{}]interface{}, errType ErrorType) { } // Function to get config error log records after iterating through the two hashes -func flushConfigErrorRecords() { +func flushKubeMonAgentEventRecords() { for ; true; <-KubeMonAgentConfigEventsSendTicker.C { Log("In flushConfigErrorRecords\n") - var laConfigErrorRecords []laConfigError + var laKubeMonAgentEventsRecords []laKubeMonAgentEvents start := time.Now() EventHashUpdateMutex.Lock() - for k, v := range ConfigErrorHash { - laConfigErrorRecord := laConfigError{ + for k, v := range ConfigErrorEvent { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ ConfigErrorMessage: k, ContainerId: v.ContainerId, Computer: v.Computer, @@ -371,12 +371,12 @@ func flushConfigErrorRecords() { ConfigErrorTime: v.ErrorTimeStamp, ConfigErrorLevel: "Error", } - laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) // Log("key[%s] value[%s]\n", k, v) } - for k, v := range PromScrapeErrorHash { - laConfigErrorRecord := laConfigError{ + for k, v := range PromScrapeErrorEvent { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ ConfigErrorMessage: k, ContainerId: v.ContainerId, Computer: v.Computer, @@ -385,18 +385,18 @@ func flushConfigErrorRecords() { ConfigErrorTime: v.ErrorTimeStamp, ConfigErrorLevel: "Warning", } - laConfigErrorRecords = append(laConfigErrorRecords, laConfigErrorRecord) + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) // Log("key[%s] value[%s]\n", k, v) } EventHashUpdateMutex.Unlock() - if len(laConfigErrorRecords) > 0 { - configErrorEntry := ConfigErrorBlob{ - DataType: ConfigErrorDataType, + if len(laKubeMonAgentEventsRecords) > 0 { + kubeMonAgentEventEntry := KubeMonAgentEventBlob{ + DataType: KubeMonAgentEventDataType, IPName: IPName, - DataItems: laConfigErrorRecords} + DataItems: laKubeMonAgentEventsRecords} - marshalled, err := json.Marshal(configErrorEntry) + marshalled, err := json.Marshal(kubeMonAgentEventEntry) Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) if err != nil { @@ -418,7 +418,7 @@ func flushConfigErrorRecords() { if err != nil { message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) Log(message) - Log("Failed to flush %d records after %s", len(laConfigErrorRecords), elapsed) + Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) // return output.FLB_RETRY } @@ -431,16 +431,13 @@ func flushConfigErrorRecords() { } defer resp.Body.Close() - numRecords := len(laConfigErrorRecords) + numRecords := len(laKubeMonAgentEventsRecords) Log("Successfully flushed %d records in %s", numRecords, elapsed) //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour EventHashUpdateMutex.Lock() - // for k := range PromScrapeErrorHash { - // delete(PromScrapeErrorHash, k) - // } - for k := range ConfigErrorHash { - delete(ConfigErrorHash, k) + for k := range PromScrapeErrorEvent { + delete(PromScrapeErrorEvent, k) } EventHashUpdateMutex.Unlock() @@ -784,8 +781,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { NameIDMap = make(map[string]string) // Keeping the two error hashes separate since we need to keep the config error hash for the lifetime of the container // whereas the prometheus scrape error hash needs to be refreshed every hour - ConfigErrorHash = make(map[string]ConfigErrorDetails) - PromScrapeErrorHash = make(map[string]ConfigErrorDetails) + ConfigErrorEvent = make(map[string]KubeMonAgentEventDetails) + PromScrapeErrorEvent = make(map[string]KubeMonAgentEventDetails) pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { @@ -887,7 +884,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { go updateContainerImageNameMaps() // Flush config error records every hour - go flushConfigErrorRecords() + go flushKubeMonAgentEventRecords() } else { Log("Running in replicaset. Disabling container enrichment caching & updates \n") } diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index dc025e648..279b4cfc1 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -211,9 +211,9 @@ func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLeve // If record contains config error or prometheus scraping errors send it to ****** table var logEntry = ToString(record["log"]) if strings.Contains(logEntry, "config::error") { - populateErrorHash(record, ConfigError) + populateErrorHash(record, ConfigErrorEvent) } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { - populateErrorHash(record, ScrapingError) + populateErrorHash(record, ScrapingErrorEvent) } } From 6a12b31aba188c3db127e4c56bd61f5e4eed2cad Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 12:36:59 -0700 Subject: [PATCH 066/117] changes --- source/code/go/src/plugins/oms.go | 76 ++++++++++++++++--------- source/code/go/src/plugins/telemetry.go | 4 +- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 06f2d6a86..cf8df6469 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -153,23 +153,28 @@ type ContainerLogBlob struct { // Config Error message to be sent to Log Analytics type laKubeMonAgentEvents struct { - CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated Computer string `json:"Computer"` - // Category string `json:"Category"` - // Level string `json:"Level"` - // Details string `json:"Details"` - ConfigErrorMessage string `json:"ConfigErrorMessage"` - ContainerId string `json:"ContainerId"` - PodName string `json:"PodName"` - ConfigErrorTime string `json:"ConfigErrorTime"` - ConfigErrorLevel string `json:"ConfigErrorLevel"` + CollectionTime string `json:"CollectionTime"` //mapped to TimeGenerated + Category string `json:"Category"` + Level string `json:"Level"` + ClusterId string `json:"ClusterId"` + ClusterName string `json:"ClusterName"` + Message string `json:"Message"` + Tags string `json:"Tags"` + // ConfigErrorMessage string `json:"ConfigErrorMessage"` + // ContainerId string `json:"ContainerId"` + // PodName string `json:"PodName"` + // ConfigErrorTime string `json:"ConfigErrorTime"` + // ConfigErrorLevel string `json:"ConfigErrorLevel"` } -type KubeMonAgentEventDetails struct { - ContainerId string - PodName string - Computer string - ErrorTimeStamp string +type KubeMonAgentEventTags struct { + PodName string + ContainerId string + // EventTime string + FirstOccurance string + LastOccurance string + Count int } type KubeMonAgentEventBlob struct { @@ -184,7 +189,7 @@ type KubeMonAgentEventType int const ( // KubeMonAgentEventType to be used as enum for ConfigError and ScrapingError ConfigErrorEvent KubeMonAgentEventType = iota - ScrapingErrorEvent + PromPromScrapingErrorEvent ) // DataType for Config error @@ -311,9 +316,9 @@ func convert(in interface{}) (float64, bool) { } // PostConfigErrorstoLA sends config/prometheus scraping error log lines to LA -func populateErrorHash(record map[interface{}]interface{}, errType KubeMonAgentEventType) { +func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType KubeMonAgentEventType) { var logRecordString = ToString(record["log"]) - var errorTimeStamp = ToString(record["time"]) + var eventTimeStamp = ToString(record["time"]) containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) Log("Updating config event hash - Locking for update \n ") @@ -325,14 +330,23 @@ func populateErrorHash(record map[interface{}]interface{}, errType KubeMonAgentE // we are converting string to json to log lines in different lines as one record logRecordString = strings.TrimSuffix(logRecordString, "\n") logRecordString = logRecordString[1 : len(logRecordString)-1] - ConfigErrorEvent[logRecordString] = KubeMonAgentEventDetails{ - ContainerId: containerID, - PodName: podName, - Computer: Computer, - ErrorTimeStamp: errorTimeStamp, + + var existingErrorEvent = ConfigErrorEvent[logRecordString] + if existingErrorEvent != nil { + existingErrorEvent.LastOccurance = eventTimeStamp + existingErrorEvent.Count = existingErrorEvent.Count + 1 + } else { + ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{ + PodName: podName, + ContainerId: containerID, + // EventTime: eventTimeStamp, + FirstOccurance: eventTimeStamp, + LastOccurance: eventTimeStamp, + Count: 1 + } } - case ScrapingErrorEvent: + case PromPromScrapingErrorEvent: // Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") if scrapingSplitString != nil && len(scrapingSplitString) == 2 { @@ -340,13 +354,21 @@ func populateErrorHash(record map[interface{}]interface{}, errType KubeMonAgentE // Trimming the newline character at the end since this is being added as the key splitString = strings.TrimSuffix(splitString, "\n") if splitString != "" { - PromScrapeErrorEvent[splitString] = KubeMonAgentEventDetails{ - ContainerId: containerID, + existingErrorEvent := PromScrapeErrorEvent[splitString] + if existingErrorEvent != nil { + existingErrorEvent.LastOccurance = eventTimeStamp + existingErrorEvent.Count = existingErrorEvent.Count + 1 + } else { + PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ PodName: podName, - Computer: Computer, - ErrorTimeStamp: errorTimeStamp, + ContainerId: containerID, + // ErrorTimeStamp: errorTimeStamp, + FirstOccurance: eventTimeStamp, + LastOccurance: eventTimeStamp, + Count: 1 } } + } } } EventHashUpdateMutex.Unlock() diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 279b4cfc1..28d916308 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -211,9 +211,9 @@ func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLeve // If record contains config error or prometheus scraping errors send it to ****** table var logEntry = ToString(record["log"]) if strings.Contains(logEntry, "config::error") { - populateErrorHash(record, ConfigErrorEvent) + populateKubeMonAgentEventHash(record, ConfigErrorEvent) } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { - populateErrorHash(record, ScrapingErrorEvent) + populateKubeMonAgentEventHash(record, PromScrapingErrorEvent) } } From 7f656b8f7d248ddd1d9ce04db361f09fc98af878 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 12:42:19 -0700 Subject: [PATCH 067/117] changes --- source/code/go/src/plugins/oms.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index cf8df6469..a3ee153d8 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -90,9 +90,9 @@ var ( // ClientSet for querying KubeAPIs ClientSet *kubernetes.Clientset // Config error hash - ConfigErrorEvent map[string]KubeMonAgentEventDetails + ConfigErrorEvent map[string]KubeMonAgentEventTags // Prometheus scraping error hash - PromScrapeErrorEvent map[string]KubeMonAgentEventDetails + PromScrapeErrorEvent map[string]KubeMonAgentEventTags // EventHashUpdateMutex read and write mutex access to the event hash EventHashUpdateMutex = &sync.Mutex{} ) @@ -342,7 +342,7 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // EventTime: eventTimeStamp, FirstOccurance: eventTimeStamp, LastOccurance: eventTimeStamp, - Count: 1 + Count: 1, } } @@ -355,20 +355,20 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K splitString = strings.TrimSuffix(splitString, "\n") if splitString != "" { existingErrorEvent := PromScrapeErrorEvent[splitString] - if existingErrorEvent != nil { - existingErrorEvent.LastOccurance = eventTimeStamp - existingErrorEvent.Count = existingErrorEvent.Count + 1 - } else { - PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ - PodName: podName, - ContainerId: containerID, - // ErrorTimeStamp: errorTimeStamp, - FirstOccurance: eventTimeStamp, - LastOccurance: eventTimeStamp, - Count: 1 + if existingErrorEvent != nil { + existingErrorEvent.LastOccurance = eventTimeStamp + existingErrorEvent.Count = existingErrorEvent.Count + 1 + } else { + PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ + PodName: podName, + ContainerId: containerID, + // ErrorTimeStamp: errorTimeStamp, + FirstOccurance: eventTimeStamp, + LastOccurance: eventTimeStamp, + Count: 1, + } } } - } } } EventHashUpdateMutex.Unlock() From d8258d2f3e27931874b09ded84cb974b5872c8a6 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 12:48:46 -0700 Subject: [PATCH 068/117] changes --- source/code/go/src/plugins/oms.go | 8 ++++---- source/code/go/src/plugins/telemetry.go | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index a3ee153d8..94de4e0c2 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -188,8 +188,8 @@ type KubeMonAgentEventType int const ( // KubeMonAgentEventType to be used as enum for ConfigError and ScrapingError - ConfigErrorEvent KubeMonAgentEventType = iota - PromPromScrapingErrorEvent + ConfigError KubeMonAgentEventType = iota + PromScrapingError ) // DataType for Config error @@ -324,7 +324,7 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K Log("Updating config event hash - Locking for update \n ") EventHashUpdateMutex.Lock() switch errType { - case ConfigErrorEvent: + case ConfigError: // Log("ConfigErrorEvent\n") // Doing this since the error logger library is adding quotes around the string and a newline to the end because // we are converting string to json to log lines in different lines as one record @@ -346,7 +346,7 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K } } - case PromPromScrapingErrorEvent: + case PromScrapingError: // Splitting this based on the string 'E! [inputs.prometheus]: ' since the log entry has timestamp and we want to remove that before building the hash var scrapingSplitString = strings.Split(logRecordString, "E! [inputs.prometheus]: ") if scrapingSplitString != nil && len(scrapingSplitString) == 2 { diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 28d916308..dbd6406bb 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -211,9 +211,9 @@ func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLeve // If record contains config error or prometheus scraping errors send it to ****** table var logEntry = ToString(record["log"]) if strings.Contains(logEntry, "config::error") { - populateKubeMonAgentEventHash(record, ConfigErrorEvent) + populateKubeMonAgentEventHash(record, ConfigError) } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { - populateKubeMonAgentEventHash(record, PromScrapingErrorEvent) + populateKubeMonAgentEventHash(record, PromScrapingError) } } From 4ce2c878064c88f02c471d08d56bce3e77bcbc7c Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 14:01:09 -0700 Subject: [PATCH 069/117] changes --- source/code/go/src/plugins/oms.go | 41 ++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 94de4e0c2..f9246b866 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -384,28 +384,41 @@ func flushKubeMonAgentEventRecords() { EventHashUpdateMutex.Lock() for k, v := range ConfigErrorEvent { + tagJson, err := json.Marshal(v) + + // if err != nil { + // return nil, err + // } + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - ConfigErrorMessage: k, - ContainerId: v.ContainerId, - Computer: v.Computer, - PodName: v.PodName, - CollectionTime: start.Format(time.RFC3339), - ConfigErrorTime: v.ErrorTimeStamp, - ConfigErrorLevel: "Error", + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/configmap", + Level: "Error", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + Tags: tagJson, } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) // Log("key[%s] value[%s]\n", k, v) } for k, v := range PromScrapeErrorEvent { + tagJson, err := json.Marshal(v) + // if err != nil { + // return nil, err + // } + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - ConfigErrorMessage: k, - ContainerId: v.ContainerId, - Computer: v.Computer, - PodName: v.PodName, - CollectionTime: start.Format(time.RFC3339), - ConfigErrorTime: v.ErrorTimeStamp, - ConfigErrorLevel: "Warning", + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/promscraping", + Level: "Warning", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + Tags: tagJson, } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) // Log("key[%s] value[%s]\n", k, v) From acf80aac3c2cf5c8dcafb8499b1d496cc4a1c69a Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 14:57:06 -0700 Subject: [PATCH 070/117] changes --- source/code/go/src/plugins/oms.go | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index f9246b866..94d9f489d 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -331,10 +331,14 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K logRecordString = strings.TrimSuffix(logRecordString, "\n") logRecordString = logRecordString[1 : len(logRecordString)-1] - var existingErrorEvent = ConfigErrorEvent[logRecordString] - if existingErrorEvent != nil { - existingErrorEvent.LastOccurance = eventTimeStamp - existingErrorEvent.Count = existingErrorEvent.Count + 1 + // var existingErrorEvent = ConfigErrorEvent[logRecordString] + if val, ok := ConfigErrorEvent[logRecordString]; ok { + // existingErrorEvent := ConfigErrorEvent[logRecordString] + ConfigErrorEvent[logRecordString].LastOccurance = eventTimeStamp + ConfigErrorEvent[logRecordString].Count = existingErrorEvent.Count + 1 + // if existingErrorEvent != nil { + // existingErrorEvent.LastOccurance = eventTimeStamp + // existingErrorEvent.Count = existingErrorEvent.Count + 1 } else { ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{ PodName: podName, @@ -355,9 +359,13 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K splitString = strings.TrimSuffix(splitString, "\n") if splitString != "" { existingErrorEvent := PromScrapeErrorEvent[splitString] - if existingErrorEvent != nil { - existingErrorEvent.LastOccurance = eventTimeStamp - existingErrorEvent.Count = existingErrorEvent.Count + 1 + // if existingErrorEvent != nil { + // existingErrorEvent.LastOccurance = eventTimeStamp + // existingErrorEvent.Count = existingErrorEvent.Count + 1 + if val, ok := PromScrapeErrorEvent[splitString]; ok { + // existingErrorEvent := PromScrapeErrorEvent[splitString] + PromScrapeErrorEvent[splitString].LastOccurance = eventTimeStamp + PromScrapeErrorEvent[splitString].Count = existingErrorEvent.Count + 1 } else { PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ PodName: podName, From 3e2e8a0a62207caa335c2a8f825e8688426803d1 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 15:04:04 -0700 Subject: [PATCH 071/117] changes --- source/code/go/src/plugins/oms.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 94d9f489d..954d04710 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -331,8 +331,8 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K logRecordString = strings.TrimSuffix(logRecordString, "\n") logRecordString = logRecordString[1 : len(logRecordString)-1] - // var existingErrorEvent = ConfigErrorEvent[logRecordString] - if val, ok := ConfigErrorEvent[logRecordString]; ok { + var existingErrorEvent = ConfigErrorEvent[logRecordString] + if val, ok := existingErrorEvent; ok { // existingErrorEvent := ConfigErrorEvent[logRecordString] ConfigErrorEvent[logRecordString].LastOccurance = eventTimeStamp ConfigErrorEvent[logRecordString].Count = existingErrorEvent.Count + 1 @@ -363,7 +363,7 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 if val, ok := PromScrapeErrorEvent[splitString]; ok { - // existingErrorEvent := PromScrapeErrorEvent[splitString] + var existingErrorEvent = PromScrapeErrorEvent[splitString] PromScrapeErrorEvent[splitString].LastOccurance = eventTimeStamp PromScrapeErrorEvent[splitString].Count = existingErrorEvent.Count + 1 } else { From 0a64baedbc360903b53516be833a10c2b6a08d9c Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 15:07:24 -0700 Subject: [PATCH 072/117] changes --- source/code/go/src/plugins/oms.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 954d04710..4b208f3ed 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -331,9 +331,9 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K logRecordString = strings.TrimSuffix(logRecordString, "\n") logRecordString = logRecordString[1 : len(logRecordString)-1] - var existingErrorEvent = ConfigErrorEvent[logRecordString] - if val, ok := existingErrorEvent; ok { - // existingErrorEvent := ConfigErrorEvent[logRecordString] + // var existingErrorEvent = ConfigErrorEvent[logRecordString] + if val, ok := ConfigErrorEvent[logRecordString]; ok { + existingErrorEvent := ConfigErrorEvent[logRecordString] ConfigErrorEvent[logRecordString].LastOccurance = eventTimeStamp ConfigErrorEvent[logRecordString].Count = existingErrorEvent.Count + 1 // if existingErrorEvent != nil { @@ -363,7 +363,7 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 if val, ok := PromScrapeErrorEvent[splitString]; ok { - var existingErrorEvent = PromScrapeErrorEvent[splitString] + existingErrorEvent := PromScrapeErrorEvent[splitString] PromScrapeErrorEvent[splitString].LastOccurance = eventTimeStamp PromScrapeErrorEvent[splitString].Count = existingErrorEvent.Count + 1 } else { @@ -824,8 +824,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { NameIDMap = make(map[string]string) // Keeping the two error hashes separate since we need to keep the config error hash for the lifetime of the container // whereas the prometheus scrape error hash needs to be refreshed every hour - ConfigErrorEvent = make(map[string]KubeMonAgentEventDetails) - PromScrapeErrorEvent = make(map[string]KubeMonAgentEventDetails) + ConfigErrorEvent = make(map[string]KubeMonAgentEventTags) + PromScrapeErrorEvent = make(map[string]KubeMonAgentEventTags) pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { From fb57e5517ac2807ec8eede87244f69975a2acb4c Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 15:15:27 -0700 Subject: [PATCH 073/117] changes --- source/code/go/src/plugins/oms.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 4b208f3ed..cf56f99a3 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -333,9 +333,9 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // var existingErrorEvent = ConfigErrorEvent[logRecordString] if val, ok := ConfigErrorEvent[logRecordString]; ok { - existingErrorEvent := ConfigErrorEvent[logRecordString] - ConfigErrorEvent[logRecordString].LastOccurance = eventTimeStamp - ConfigErrorEvent[logRecordString].Count = existingErrorEvent.Count + 1 + existingErrorEvent := &ConfigErrorEvent[logRecordString] + *ConfigErrorEvent[logRecordString].LastOccurance = eventTimeStamp + *ConfigErrorEvent[logRecordString].Count = existingErrorEvent.Count + 1 // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 @@ -358,14 +358,14 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // Trimming the newline character at the end since this is being added as the key splitString = strings.TrimSuffix(splitString, "\n") if splitString != "" { - existingErrorEvent := PromScrapeErrorEvent[splitString] + // existingErrorEvent := PromScrapeErrorEvent[splitString] // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 if val, ok := PromScrapeErrorEvent[splitString]; ok { - existingErrorEvent := PromScrapeErrorEvent[splitString] - PromScrapeErrorEvent[splitString].LastOccurance = eventTimeStamp - PromScrapeErrorEvent[splitString].Count = existingErrorEvent.Count + 1 + existingErrorEvent := &PromScrapeErrorEvent[splitString] + *PromScrapeErrorEvent[splitString].LastOccurance = eventTimeStamp + *PromScrapeErrorEvent[splitString].Count = existingErrorEvent.Count + 1 } else { PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ PodName: podName, From fa251852683dce562839f7505db4caf6b4fd5fb5 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 15:23:14 -0700 Subject: [PATCH 074/117] chnages --- source/code/go/src/plugins/oms.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index cf56f99a3..254456f81 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -332,10 +332,10 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K logRecordString = logRecordString[1 : len(logRecordString)-1] // var existingErrorEvent = ConfigErrorEvent[logRecordString] - if val, ok := ConfigErrorEvent[logRecordString]; ok { - existingErrorEvent := &ConfigErrorEvent[logRecordString] - *ConfigErrorEvent[logRecordString].LastOccurance = eventTimeStamp - *ConfigErrorEvent[logRecordString].Count = existingErrorEvent.Count + 1 + if val, ok := &ConfigErrorEvent[logRecordString]; ok { + // existingErrorEvent := &ConfigErrorEvent[logRecordString] + val.LastOccurance = eventTimeStamp + val.Count = ConfigErrorEvent[logRecordString].Count + 1 // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 @@ -362,10 +362,10 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 - if val, ok := PromScrapeErrorEvent[splitString]; ok { - existingErrorEvent := &PromScrapeErrorEvent[splitString] - *PromScrapeErrorEvent[splitString].LastOccurance = eventTimeStamp - *PromScrapeErrorEvent[splitString].Count = existingErrorEvent.Count + 1 + if val, ok := &PromScrapeErrorEvent[splitString]; ok { + // existingErrorEvent := &PromScrapeErrorEvent[splitString] + val.LastOccurance = eventTimeStamp + val.Count = PromScrapeErrorEvent[splitString].Count + 1 } else { PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ PodName: podName, From 87a84d1271fef4b53116ce006daf14e569f331cb Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 15:41:14 -0700 Subject: [PATCH 075/117] changes --- source/code/go/src/plugins/oms.go | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 254456f81..5eb04dc5c 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -90,9 +90,9 @@ var ( // ClientSet for querying KubeAPIs ClientSet *kubernetes.Clientset // Config error hash - ConfigErrorEvent map[string]KubeMonAgentEventTags + ConfigErrorEvent map[string]*KubeMonAgentEventTags // Prometheus scraping error hash - PromScrapeErrorEvent map[string]KubeMonAgentEventTags + PromScrapeErrorEvent map[string]*KubeMonAgentEventTags // EventHashUpdateMutex read and write mutex access to the event hash EventHashUpdateMutex = &sync.Mutex{} ) @@ -332,15 +332,15 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K logRecordString = logRecordString[1 : len(logRecordString)-1] // var existingErrorEvent = ConfigErrorEvent[logRecordString] - if val, ok := &ConfigErrorEvent[logRecordString]; ok { + if val, ok := ConfigErrorEvent[logRecordString]; ok { // existingErrorEvent := &ConfigErrorEvent[logRecordString] - val.LastOccurance = eventTimeStamp - val.Count = ConfigErrorEvent[logRecordString].Count + 1 + *val.LastOccurance = eventTimeStamp + *val.Count = *val.Count + 1 // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 } else { - ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{ + ConfigErrorEvent[logRecordString] = &KubeMonAgentEventTags{ PodName: podName, ContainerId: containerID, // EventTime: eventTimeStamp, @@ -362,12 +362,12 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 - if val, ok := &PromScrapeErrorEvent[splitString]; ok { + if val, ok := PromScrapeErrorEvent[splitString]; ok { // existingErrorEvent := &PromScrapeErrorEvent[splitString] - val.LastOccurance = eventTimeStamp - val.Count = PromScrapeErrorEvent[splitString].Count + 1 + *val.LastOccurance = eventTimeStamp + *val.Count = *val.Count + 1 } else { - PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ + PromScrapeErrorEvent[splitString] = &KubeMonAgentEventTags{ PodName: podName, ContainerId: containerID, // ErrorTimeStamp: errorTimeStamp, @@ -392,7 +392,7 @@ func flushKubeMonAgentEventRecords() { EventHashUpdateMutex.Lock() for k, v := range ConfigErrorEvent { - tagJson, err := json.Marshal(v) + tagJson, err := json.Marshal(*v) // if err != nil { // return nil, err @@ -413,7 +413,7 @@ func flushKubeMonAgentEventRecords() { } for k, v := range PromScrapeErrorEvent { - tagJson, err := json.Marshal(v) + tagJson, err := json.Marshal(*v) // if err != nil { // return nil, err // } @@ -824,8 +824,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { NameIDMap = make(map[string]string) // Keeping the two error hashes separate since we need to keep the config error hash for the lifetime of the container // whereas the prometheus scrape error hash needs to be refreshed every hour - ConfigErrorEvent = make(map[string]KubeMonAgentEventTags) - PromScrapeErrorEvent = make(map[string]KubeMonAgentEventTags) + ConfigErrorEvent = make(map[string]*KubeMonAgentEventTags) + PromScrapeErrorEvent = make(map[string]*KubeMonAgentEventTags) pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { From 24d8e8617bde24c579803baa2f2e9c5acb900fcc Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 15:43:53 -0700 Subject: [PATCH 076/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 5eb04dc5c..0b8f5309d 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -334,8 +334,8 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // var existingErrorEvent = ConfigErrorEvent[logRecordString] if val, ok := ConfigErrorEvent[logRecordString]; ok { // existingErrorEvent := &ConfigErrorEvent[logRecordString] - *val.LastOccurance = eventTimeStamp - *val.Count = *val.Count + 1 + (*val).LastOccurance = eventTimeStamp + (*val).Count = ((*val).Count) + 1 // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 From 6e4f9ce833cd93c7d9f5fd0185d3257be7dea8ff Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 15:46:31 -0700 Subject: [PATCH 077/117] changes --- source/code/go/src/plugins/oms.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 0b8f5309d..d3710cc0f 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -364,8 +364,8 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // existingErrorEvent.Count = existingErrorEvent.Count + 1 if val, ok := PromScrapeErrorEvent[splitString]; ok { // existingErrorEvent := &PromScrapeErrorEvent[splitString] - *val.LastOccurance = eventTimeStamp - *val.Count = *val.Count + 1 + *(val).LastOccurance = eventTimeStamp + *(val).Count = ((*val).Count) + 1 } else { PromScrapeErrorEvent[splitString] = &KubeMonAgentEventTags{ PodName: podName, @@ -392,7 +392,7 @@ func flushKubeMonAgentEventRecords() { EventHashUpdateMutex.Lock() for k, v := range ConfigErrorEvent { - tagJson, err := json.Marshal(*v) + // tagJson, err := json.Marshal(*v) // if err != nil { // return nil, err @@ -406,14 +406,14 @@ func flushKubeMonAgentEventRecords() { ClusterId: ResourceID, ClusterName: ResourceName, Message: k, - Tags: tagJson, + Tags: *v, } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) // Log("key[%s] value[%s]\n", k, v) } for k, v := range PromScrapeErrorEvent { - tagJson, err := json.Marshal(*v) + // tagJson, err := json.Marshal(*v) // if err != nil { // return nil, err // } @@ -426,7 +426,7 @@ func flushKubeMonAgentEventRecords() { ClusterId: ResourceID, ClusterName: ResourceName, Message: k, - Tags: tagJson, + Tags: *v, } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) // Log("key[%s] value[%s]\n", k, v) From a418ff2ead60b7394a385cb67fa14c86fdd3b0ab Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 15:53:28 -0700 Subject: [PATCH 078/117] changes --- source/code/go/src/plugins/oms.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index d3710cc0f..e186cdf2f 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -364,8 +364,8 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // existingErrorEvent.Count = existingErrorEvent.Count + 1 if val, ok := PromScrapeErrorEvent[splitString]; ok { // existingErrorEvent := &PromScrapeErrorEvent[splitString] - *(val).LastOccurance = eventTimeStamp - *(val).Count = ((*val).Count) + 1 + (*val).LastOccurance = eventTimeStamp + (*val).Count = ((*val).Count) + 1 } else { PromScrapeErrorEvent[splitString] = &KubeMonAgentEventTags{ PodName: podName, @@ -406,7 +406,7 @@ func flushKubeMonAgentEventRecords() { ClusterId: ResourceID, ClusterName: ResourceName, Message: k, - Tags: *v, + Tags: ToString(*v), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) // Log("key[%s] value[%s]\n", k, v) @@ -426,7 +426,7 @@ func flushKubeMonAgentEventRecords() { ClusterId: ResourceID, ClusterName: ResourceName, Message: k, - Tags: *v, + Tags: ToString(*v), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) // Log("key[%s] value[%s]\n", k, v) From 47b1515954793bf0911549099eee49ccb6890db9 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 16:20:41 -0700 Subject: [PATCH 079/117] changes --- source/code/go/src/plugins/oms.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index e186cdf2f..351301dd1 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -53,7 +53,7 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp // IPName for Container Log const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 -const kubeMonAgentConfigEventFlushInterval = 300 +const kubeMonAgentConfigEventFlushInterval = 60 var ( // PluginConfiguration the plugins configuration @@ -409,7 +409,7 @@ func flushKubeMonAgentEventRecords() { Tags: ToString(*v), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) + Log("key[%s] value[%s]\n", k, *v) } for k, v := range PromScrapeErrorEvent { @@ -429,7 +429,7 @@ func flushKubeMonAgentEventRecords() { Tags: ToString(*v), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) + Log("key[%s] value[%s]\n", k, *v) } EventHashUpdateMutex.Unlock() From 254b1ddee441d8a2dc6bf8953aebd588ae25df39 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 16:38:41 -0700 Subject: [PATCH 080/117] changes --- source/code/go/src/plugins/oms.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 351301dd1..649a62505 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -392,7 +392,7 @@ func flushKubeMonAgentEventRecords() { EventHashUpdateMutex.Lock() for k, v := range ConfigErrorEvent { - // tagJson, err := json.Marshal(*v) + tagJson, err := json.Marshal(v) // if err != nil { // return nil, err @@ -406,14 +406,14 @@ func flushKubeMonAgentEventRecords() { ClusterId: ResourceID, ClusterName: ResourceName, Message: k, - Tags: ToString(*v), + Tags: fmt.Sprintf("%s", tagJson), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) Log("key[%s] value[%s]\n", k, *v) } for k, v := range PromScrapeErrorEvent { - // tagJson, err := json.Marshal(*v) + tagJson, err := json.Marshal(v) // if err != nil { // return nil, err // } @@ -426,7 +426,7 @@ func flushKubeMonAgentEventRecords() { ClusterId: ResourceID, ClusterName: ResourceName, Message: k, - Tags: ToString(*v), + Tags: fmt.Sprintf("%s", tagJson), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) Log("key[%s] value[%s]\n", k, *v) From bd1bf159e64947723efcffaf6f9e4e914f5f5af0 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 16:41:37 -0700 Subject: [PATCH 081/117] changes --- source/code/go/src/plugins/oms.go | 61 ++++++++++++++++--------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 649a62505..8a2a9ada7 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -394,42 +394,43 @@ func flushKubeMonAgentEventRecords() { for k, v := range ConfigErrorEvent { tagJson, err := json.Marshal(v) - // if err != nil { - // return nil, err - // } - - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/configmap", - Level: "Error", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - Tags: fmt.Sprintf("%s", tagJson), + if err != nil { + // return nil, err + + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/configmap", + Level: "Error", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + Tags: fmt.Sprintf("%s", tagJson), + } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + Log("key[%s] value[%s]\n", k, *v) } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - Log("key[%s] value[%s]\n", k, *v) } for k, v := range PromScrapeErrorEvent { tagJson, err := json.Marshal(v) - // if err != nil { - // return nil, err - // } - - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/promscraping", - Level: "Warning", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - Tags: fmt.Sprintf("%s", tagJson), + if err != nil { + // return nil, err + // } + + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/promscraping", + Level: "Warning", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + Tags: fmt.Sprintf("%s", tagJson), + } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + Log("key[%s] value[%s]\n", k, *v) } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - Log("key[%s] value[%s]\n", k, *v) } EventHashUpdateMutex.Unlock() From e4b3934cf3568dc98fdb3773d18f4d237f0d6ad6 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 16:52:41 -0700 Subject: [PATCH 082/117] changes --- source/code/go/src/plugins/oms.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 8a2a9ada7..7242fff52 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -396,7 +396,8 @@ func flushKubeMonAgentEventRecords() { if err != nil { // return nil, err - + Log(ToString(err)) + } else { laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ Computer: Computer, CollectionTime: start.Format(time.RFC3339), @@ -417,7 +418,8 @@ func flushKubeMonAgentEventRecords() { if err != nil { // return nil, err // } - + Log(ToString(err)) + } else { laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ Computer: Computer, CollectionTime: start.Format(time.RFC3339), From 76f7a63119ab0d037e41013031ff74c234543669 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 17:30:46 -0700 Subject: [PATCH 083/117] changes --- source/code/go/src/plugins/oms.go | 121 ++++++++++++++++++------------ 1 file changed, 75 insertions(+), 46 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 7242fff52..06592c0af 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -90,9 +90,9 @@ var ( // ClientSet for querying KubeAPIs ClientSet *kubernetes.Clientset // Config error hash - ConfigErrorEvent map[string]*KubeMonAgentEventTags + ConfigErrorEvent map[string]KubeMonAgentEventTags // Prometheus scraping error hash - PromScrapeErrorEvent map[string]*KubeMonAgentEventTags + PromScrapeErrorEvent map[string]KubeMonAgentEventTags // EventHashUpdateMutex read and write mutex access to the event hash EventHashUpdateMutex = &sync.Mutex{} ) @@ -334,13 +334,25 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // var existingErrorEvent = ConfigErrorEvent[logRecordString] if val, ok := ConfigErrorEvent[logRecordString]; ok { // existingErrorEvent := &ConfigErrorEvent[logRecordString] - (*val).LastOccurance = eventTimeStamp - (*val).Count = ((*val).Count) + 1 + Log("In config error existing hash update\n") + eventCount := val.Count + eventFirstOccurance := val.FirstOccurance + + ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{ + PodName: podName, + ContainerId: containerID, + // EventTime: eventTimeStamp, + FirstOccurance: eventFirstOccurance, + LastOccurance: eventTimeStamp, + Count: eventCount + 1, + } + // (*val).LastOccurance = eventTimeStamp + // (*val).Count = ((*val).Count) + 1 // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 } else { - ConfigErrorEvent[logRecordString] = &KubeMonAgentEventTags{ + ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{ PodName: podName, ContainerId: containerID, // EventTime: eventTimeStamp, @@ -363,11 +375,24 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 if val, ok := PromScrapeErrorEvent[splitString]; ok { + Log("In config error existing hash update\n") + eventCount := val.Count + eventFirstOccurance := val.FirstOccurance + + PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ + PodName: podName, + ContainerId: containerID, + // EventTime: eventTimeStamp, + FirstOccurance: eventFirstOccurance, + LastOccurance: eventTimeStamp, + Count: eventCount + 1, + } + // existingErrorEvent := &PromScrapeErrorEvent[splitString] - (*val).LastOccurance = eventTimeStamp - (*val).Count = ((*val).Count) + 1 + // (*val).LastOccurance = eventTimeStamp + // (*val).Count = ((*val).Count) + 1 } else { - PromScrapeErrorEvent[splitString] = &KubeMonAgentEventTags{ + PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ PodName: podName, ContainerId: containerID, // ErrorTimeStamp: errorTimeStamp, @@ -392,47 +417,51 @@ func flushKubeMonAgentEventRecords() { EventHashUpdateMutex.Lock() for k, v := range ConfigErrorEvent { - tagJson, err := json.Marshal(v) - - if err != nil { - // return nil, err - Log(ToString(err)) - } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/configmap", - Level: "Error", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - Tags: fmt.Sprintf("%s", tagJson), - } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - Log("key[%s] value[%s]\n", k, *v) + // tagJson, err := json.Marshal(v) + + // if err != nil { + // // return nil, err + // Log(ToString(err)) + // } else { + + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/configmap", + Level: "Error", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + // Tags: fmt.Sprintf("%s", tagJson), + Tags: fmt.Sprintf("%s", v), } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + Log("key[%s] value[%s]\n", k, v) + // } } for k, v := range PromScrapeErrorEvent { - tagJson, err := json.Marshal(v) - if err != nil { - // return nil, err - // } - Log(ToString(err)) - } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/promscraping", - Level: "Warning", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - Tags: fmt.Sprintf("%s", tagJson), - } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - Log("key[%s] value[%s]\n", k, *v) + // tagJson, err := json.Marshal(v) + // if err != nil { + // // return nil, err + // // } + // Log(ToString(err)) + // } else { + + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/promscraping", + Level: "Warning", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + // Tags: fmt.Sprintf("%s", tagJson), + Tags: fmt.Sprintf("%s", v), } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + Log("key[%s] value[%s]\n", k, v) + // } } EventHashUpdateMutex.Unlock() @@ -827,8 +856,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { NameIDMap = make(map[string]string) // Keeping the two error hashes separate since we need to keep the config error hash for the lifetime of the container // whereas the prometheus scrape error hash needs to be refreshed every hour - ConfigErrorEvent = make(map[string]*KubeMonAgentEventTags) - PromScrapeErrorEvent = make(map[string]*KubeMonAgentEventTags) + ConfigErrorEvent = make(map[string]KubeMonAgentEventTags) + PromScrapeErrorEvent = make(map[string]KubeMonAgentEventTags) pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { From 8768b6deb6dd802815972c80fd9ccc4f96924b20 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 17:58:17 -0700 Subject: [PATCH 084/117] changes --- source/code/go/src/plugins/oms.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 06592c0af..392a6f600 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -374,6 +374,8 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 + Log("Trying to get existing value: %s", PromScrapeErrorEvent[splitString]) + if val, ok := PromScrapeErrorEvent[splitString]; ok { Log("In config error existing hash update\n") eventCount := val.Count From ed901b13b12f8440c1f0fdfbad10328e92db34e7 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 18:17:37 -0700 Subject: [PATCH 085/117] changes --- source/code/go/src/plugins/oms.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 392a6f600..cf8c78e1d 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -374,7 +374,11 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 - Log("Trying to get existing value: %s", PromScrapeErrorEvent[splitString]) + Log("Trying to get existing value: \n") + for k, v := range PromScrapeErrorEvent { + Log("key[%s] value[%s]\n", k, v) + Log("splitString: %s", splitString) + } if val, ok := PromScrapeErrorEvent[splitString]; ok { Log("In config error existing hash update\n") From a3f05b0df41be1ca84cb369b0e0199786f917b15 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 18:36:09 -0700 Subject: [PATCH 086/117] changes --- source/code/go/src/plugins/oms.go | 38 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index cf8c78e1d..2b863f2ed 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -423,13 +423,12 @@ func flushKubeMonAgentEventRecords() { EventHashUpdateMutex.Lock() for k, v := range ConfigErrorEvent { - // tagJson, err := json.Marshal(v) + tagJson, err := json.Marshal(v) - // if err != nil { + if err != nil { // // return nil, err - // Log(ToString(err)) - // } else { - + Log(ToString(err)) + } else { laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ Computer: Computer, CollectionTime: start.Format(time.RFC3339), @@ -439,21 +438,20 @@ func flushKubeMonAgentEventRecords() { ClusterName: ResourceName, Message: k, // Tags: fmt.Sprintf("%s", tagJson), - Tags: fmt.Sprintf("%s", v), + Tags: fmt.Sprintf("%s", tagJson), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - Log("key[%s] value[%s]\n", k, v) + // Log("key[%s] value[%s]\n", k, v) // } } for k, v := range PromScrapeErrorEvent { - // tagJson, err := json.Marshal(v) - // if err != nil { + tagJson, err := json.Marshal(v) + if err != nil { // // return nil, err // // } - // Log(ToString(err)) - // } else { - + Log(ToString(err)) + } else { laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ Computer: Computer, CollectionTime: start.Format(time.RFC3339), @@ -462,11 +460,11 @@ func flushKubeMonAgentEventRecords() { ClusterId: ResourceID, ClusterName: ResourceName, Message: k, - // Tags: fmt.Sprintf("%s", tagJson), - Tags: fmt.Sprintf("%s", v), + Tags: fmt.Sprintf("%s", tagJson), + // Tags: fmt.Sprintf("%s", v), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - Log("key[%s] value[%s]\n", k, v) + // Log("key[%s] value[%s]\n", k, v) // } } EventHashUpdateMutex.Unlock() @@ -516,11 +514,11 @@ func flushKubeMonAgentEventRecords() { Log("Successfully flushed %d records in %s", numRecords, elapsed) //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - EventHashUpdateMutex.Lock() - for k := range PromScrapeErrorEvent { - delete(PromScrapeErrorEvent, k) - } - EventHashUpdateMutex.Unlock() + // EventHashUpdateMutex.Lock() + // for k := range PromScrapeErrorEvent { + // delete(PromScrapeErrorEvent, k) + // } + // EventHashUpdateMutex.Unlock() } } From 3c5c8d7a2bab96a39cabaabe453c4173a9900510 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 18:39:02 -0700 Subject: [PATCH 087/117] changes --- source/code/go/src/plugins/oms.go | 60 ++++++++++++++++--------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 2b863f2ed..c9d20c71a 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -426,46 +426,48 @@ func flushKubeMonAgentEventRecords() { tagJson, err := json.Marshal(v) if err != nil { - // // return nil, err + // // return nil, err Log(ToString(err)) } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/configmap", - Level: "Error", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - // Tags: fmt.Sprintf("%s", tagJson), - Tags: fmt.Sprintf("%s", tagJson), + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/configmap", + Level: "Error", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + // Tags: fmt.Sprintf("%s", tagJson), + Tags: fmt.Sprintf("%s", tagJson), + } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + // Log("key[%s] value[%s]\n", k, v) + // } } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) - // } } for k, v := range PromScrapeErrorEvent { tagJson, err := json.Marshal(v) if err != nil { - // // return nil, err - // // } + // // return nil, err + // // } Log(ToString(err)) } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/promscraping", - Level: "Warning", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - Tags: fmt.Sprintf("%s", tagJson), - // Tags: fmt.Sprintf("%s", v), + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/promscraping", + Level: "Warning", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + Tags: fmt.Sprintf("%s", tagJson), + // Tags: fmt.Sprintf("%s", v), + } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + // Log("key[%s] value[%s]\n", k, v) + // } } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) - // } } EventHashUpdateMutex.Unlock() From 284a7cdc5233d50ff0ec30ab607c9849c7771088 Mon Sep 17 00:00:00 2001 From: rashmy Date: Sun, 22 Sep 2019 18:48:13 -0700 Subject: [PATCH 088/117] changes --- source/code/go/src/plugins/oms.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index c9d20c71a..fc4d396ff 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -516,11 +516,11 @@ func flushKubeMonAgentEventRecords() { Log("Successfully flushed %d records in %s", numRecords, elapsed) //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - // EventHashUpdateMutex.Lock() - // for k := range PromScrapeErrorEvent { - // delete(PromScrapeErrorEvent, k) - // } - // EventHashUpdateMutex.Unlock() + EventHashUpdateMutex.Lock() + for k := range PromScrapeErrorEvent { + delete(PromScrapeErrorEvent, k) + } + EventHashUpdateMutex.Unlock() } } @@ -919,7 +919,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { ContainerImageNameRefreshTicker = time.NewTicker(time.Second * time.Duration(containerInventoryRefreshInterval)) Log("kubeMonAgentConfigEventFlushInterval = %d \n", kubeMonAgentConfigEventFlushInterval) - KubeMonAgentConfigEventsSendTicker = time.NewTicker(time.Second * time.Duration(kubeMonAgentConfigEventFlushInterval)) + KubeMonAgentConfigEventsSendTicker = time.NewTicker(time.Minute * time.Duration(kubeMonAgentConfigEventFlushInterval)) // Populate Computer field containerHostName, err := ioutil.ReadFile(pluginConfig["container_host_file_path"]) From 4a2fe914a5cc571681f6a0b1ec57eaf07de97095 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 14:01:26 -0700 Subject: [PATCH 089/117] changes --- source/code/go/src/plugins/oms.go | 218 +++++++++++++----------- source/code/go/src/plugins/out_oms.go | 22 ++- source/code/go/src/plugins/telemetry.go | 35 ++-- 3 files changed, 160 insertions(+), 115 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index fc4d396ff..f5ba22e56 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -374,11 +374,11 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // if existingErrorEvent != nil { // existingErrorEvent.LastOccurance = eventTimeStamp // existingErrorEvent.Count = existingErrorEvent.Count + 1 - Log("Trying to get existing value: \n") - for k, v := range PromScrapeErrorEvent { - Log("key[%s] value[%s]\n", k, v) - Log("splitString: %s", splitString) - } + // Log("Trying to get existing value: \n") + // for k, v := range PromScrapeErrorEvent { + // Log("key[%s] value[%s]\n", k, v) + // Log("splitString: %s", splitString) + // } if val, ok := PromScrapeErrorEvent[splitString]; ok { Log("In config error existing hash update\n") @@ -414,116 +414,144 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K Log("Updating config event hash - Unlocked after update \n ") } +// Sends data to post config events and to App Insights Instance +// func PostConfigEventsAndSendAITraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int { +// var logLines []string +// for _, record := range records { +// logLines = append(logLines, ToString(record["log"])) +// // If record contains config error or prometheus scraping errors send it to ****** table +// var logEntry = ToString(record["log"]) +// if strings.Contains(logEntry, "config::error") { +// populateKubeMonAgentEventHash(record, ConfigError) +// } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { +// populateKubeMonAgentEventHash(record, PromScrapingError) +// } +// } + +// // Pushing traces to App Insights after joining the record lines +// PushToAppInsightsTraces(logLines, severityLevel, tag) + +// // for ; true; <-KubeMonAgentConfigEventsSendTicker.C { +// // Log("Flushing config error records\n") +// // flushConfigErrorRecords() +// // } +// // traceEntry := strings.Join(logLines, "\n") +// // traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel) +// // traceTelemetryItem.Properties["tag"] = tag +// // TelemetryClient.Track(traceTelemetryItem) +// // return output.FLB_OK +// } + // Function to get config error log records after iterating through the two hashes func flushKubeMonAgentEventRecords() { - for ; true; <-KubeMonAgentConfigEventsSendTicker.C { - Log("In flushConfigErrorRecords\n") - var laKubeMonAgentEventsRecords []laKubeMonAgentEvents - start := time.Now() + // for ; true; <-KubeMonAgentConfigEventsSendTicker.C { + Log("In flushConfigErrorRecords\n") + var laKubeMonAgentEventsRecords []laKubeMonAgentEvents + start := time.Now() - EventHashUpdateMutex.Lock() - for k, v := range ConfigErrorEvent { - tagJson, err := json.Marshal(v) - - if err != nil { - // // return nil, err - Log(ToString(err)) - } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/configmap", - Level: "Error", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - // Tags: fmt.Sprintf("%s", tagJson), - Tags: fmt.Sprintf("%s", tagJson), - } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) - // } + EventHashUpdateMutex.Lock() + for k, v := range ConfigErrorEvent { + tagJson, err := json.Marshal(v) + + if err != nil { + // // return nil, err + Log(ToString(err)) + } else { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/configmap", + Level: "Error", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + // Tags: fmt.Sprintf("%s", tagJson), + Tags: fmt.Sprintf("%s", tagJson), } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + // Log("key[%s] value[%s]\n", k, v) + // } } + } - for k, v := range PromScrapeErrorEvent { - tagJson, err := json.Marshal(v) - if err != nil { - // // return nil, err - // // } - Log(ToString(err)) - } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/promscraping", - Level: "Warning", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - Tags: fmt.Sprintf("%s", tagJson), - // Tags: fmt.Sprintf("%s", v), - } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) - // } + for k, v := range PromScrapeErrorEvent { + tagJson, err := json.Marshal(v) + if err != nil { + // // return nil, err + // // } + Log(ToString(err)) + } else { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/promscraping", + Level: "Warning", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + Tags: fmt.Sprintf("%s", tagJson), + // Tags: fmt.Sprintf("%s", v), } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + // Log("key[%s] value[%s]\n", k, v) + // } } - EventHashUpdateMutex.Unlock() + } + EventHashUpdateMutex.Unlock() - if len(laKubeMonAgentEventsRecords) > 0 { - kubeMonAgentEventEntry := KubeMonAgentEventBlob{ - DataType: KubeMonAgentEventDataType, - IPName: IPName, - DataItems: laKubeMonAgentEventsRecords} + if len(laKubeMonAgentEventsRecords) > 0 { + kubeMonAgentEventEntry := KubeMonAgentEventBlob{ + DataType: KubeMonAgentEventDataType, + IPName: IPName, + DataItems: laKubeMonAgentEventsRecords} - marshalled, err := json.Marshal(kubeMonAgentEventEntry) + marshalled, err := json.Marshal(kubeMonAgentEventEntry) - Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) - if err != nil { - message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) - Log(message) - SendException(message) - // return output.FLB_OK - } - req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) - req.Header.Set("Content-Type", "application/json") - //expensive to do string len for every request, so use a flag - if ResourceCentric == true { - req.Header.Set("x-ms-AzureResourceId", ResourceID) - } + Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + Log(message) + SendException(message) + // return output.FLB_OK + } + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) + req.Header.Set("Content-Type", "application/json") + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) + } - resp, err := HTTPClient.Do(req) - elapsed := time.Since(start) + resp, err := HTTPClient.Do(req) + elapsed := time.Since(start) - if err != nil { - message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) - Log(message) - Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) + if err != nil { + message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) + Log(message) + Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) - // return output.FLB_RETRY - } + // return output.FLB_RETRY + } - if resp == nil || resp.StatusCode != 200 { - if resp != nil { - Log("Status %s Status Code %d", resp.Status, resp.StatusCode) - } - // return output.FLB_RETRY + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("Status %s Status Code %d", resp.Status, resp.StatusCode) } + // return output.FLB_RETRY + } - defer resp.Body.Close() - numRecords := len(laKubeMonAgentEventsRecords) - Log("Successfully flushed %d records in %s", numRecords, elapsed) - - //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - EventHashUpdateMutex.Lock() - for k := range PromScrapeErrorEvent { - delete(PromScrapeErrorEvent, k) - } - EventHashUpdateMutex.Unlock() + defer resp.Body.Close() + numRecords := len(laKubeMonAgentEventsRecords) + Log("Successfully flushed %d records in %s", numRecords, elapsed) + //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour + EventHashUpdateMutex.Lock() + for k := range PromScrapeErrorEvent { + delete(PromScrapeErrorEvent, k) } + EventHashUpdateMutex.Unlock() + } + // } } //Translates telegraf time series to one or more Azure loganalytics metric(s) diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index 654772f32..3228c414a 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -2,13 +2,12 @@ package main import ( "github.com/fluent/fluent-bit-go/output" - "github.com/Microsoft/ApplicationInsights-Go/appinsights" ) import ( "C" + "os" "strings" "unsafe" - "os" ) //export FLBPluginRegister @@ -61,8 +60,25 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { incomingTag := strings.ToLower(C.GoString(tag)) if strings.Contains(incomingTag, "oms.container.log.flbplugin") { + var logLines []string + for _, record := range records { + logLines = append(logLines, ToString(record["log"])) + // If record contains config error or prometheus scraping errors send it to ****** table + var logEntry = ToString(record["log"]) + if strings.Contains(logEntry, "config::error") { + populateKubeMonAgentEventHash(record, ConfigError) + } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { + populateKubeMonAgentEventHash(record, PromScrapingError) + } + } + for ; true; <-KubeMonAgentConfigEventsSendTicker.C { + return flushKubeMonAgentEventRecords() + } + // This will also include routing to send data to OMS workspace for config errors - return PushToAppInsightsTraces(records, appinsights.Information, incomingTag) + return PushToAppInsightsTraces(logLines, severityLevel, tag) + // return PostConfigEventsAndSendAITraces(records, appinsights.Information, incomingTag) + // return PushToAppInsightsTraces(records, appinsights.Information, incomingTag) } else if strings.Contains(incomingTag, "oms.container.perf.telegraf") { return PostTelegrafMetricsToLA(records) } diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index dbd6406bb..7dd61ad7b 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -204,23 +204,24 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { } // PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance -func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int { - var logLines []string - for _, record := range records { - logLines = append(logLines, ToString(record["log"])) - // If record contains config error or prometheus scraping errors send it to ****** table - var logEntry = ToString(record["log"]) - if strings.Contains(logEntry, "config::error") { - populateKubeMonAgentEventHash(record, ConfigError) - } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { - populateKubeMonAgentEventHash(record, PromScrapingError) - } - } - - // for ; true; <-KubeMonAgentConfigEventsSendTicker.C { - // Log("Flushing config error records\n") - // flushConfigErrorRecords() - // } +// func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int { +// var logLines []string +// for _, record := range records { +// logLines = append(logLines, ToString(record["log"])) +// // If record contains config error or prometheus scraping errors send it to ****** table +// var logEntry = ToString(record["log"]) +// if strings.Contains(logEntry, "config::error") { +// populateKubeMonAgentEventHash(record, ConfigError) +// } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { +// populateKubeMonAgentEventHash(record, PromScrapingError) +// } +// } + +// for ; true; <-KubeMonAgentConfigEventsSendTicker.C { +// Log("Flushing config error records\n") +// flushConfigErrorRecords() +// } +func PushToAppInsightsTraces(logLines []string, severityLevel contracts.SeverityLevel, tag string) int { traceEntry := strings.Join(logLines, "\n") traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel) traceTelemetryItem.Properties["tag"] = tag From a0f82187184ed871d6cf345282a2655cee9d486a Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 15:41:56 -0700 Subject: [PATCH 090/117] changes --- source/code/go/src/plugins/oms.go | 207 ++++++++++++++---------- source/code/go/src/plugins/out_oms.go | 21 +-- source/code/go/src/plugins/telemetry.go | 31 ++-- 3 files changed, 137 insertions(+), 122 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index f5ba22e56..b16bad4a9 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -444,114 +444,151 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // Function to get config error log records after iterating through the two hashes func flushKubeMonAgentEventRecords() { - // for ; true; <-KubeMonAgentConfigEventsSendTicker.C { - Log("In flushConfigErrorRecords\n") - var laKubeMonAgentEventsRecords []laKubeMonAgentEvents - start := time.Now() + for ; true; <-KubeMonAgentConfigEventsSendTicker.C { + Log("In flushConfigErrorRecords\n") + var laKubeMonAgentEventsRecords []laKubeMonAgentEvents + start := time.Now() - EventHashUpdateMutex.Lock() - for k, v := range ConfigErrorEvent { - tagJson, err := json.Marshal(v) - if err != nil { - // // return nil, err - Log(ToString(err)) - } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/configmap", - Level: "Error", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - // Tags: fmt.Sprintf("%s", tagJson), - Tags: fmt.Sprintf("%s", tagJson), +if ((len(ConfigErrorEvent) > 0) || (len(PromScrapeErrorEvent) > 0)) { + EventHashUpdateMutex.Lock() + for k, v := range ConfigErrorEvent { + tagJson, err := json.Marshal(v) + + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + Log(message) + SendException(message) + } else { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/configmap", + Level: "Error", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + // Tags: fmt.Sprintf("%s", tagJson), + Tags: fmt.Sprintf("%s", tagJson), + } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + // Log("key[%s] value[%s]\n", k, v) + // } } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) - // } } - } - for k, v := range PromScrapeErrorEvent { - tagJson, err := json.Marshal(v) - if err != nil { - // // return nil, err - // // } - Log(ToString(err)) - } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/promscraping", - Level: "Warning", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - Tags: fmt.Sprintf("%s", tagJson), - // Tags: fmt.Sprintf("%s", v), + for k, v := range PromScrapeErrorEvent { + tagJson, err := json.Marshal(v) + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + Log(message) + SendException(message) + } else { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/promscraping", + Level: "Warning", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + Tags: fmt.Sprintf("%s", tagJson), + // Tags: fmt.Sprintf("%s", v), + } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + // Log("key[%s] value[%s]\n", k, v) + // } } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) - // } } + EventHashUpdateMutex.Unlock() } - EventHashUpdateMutex.Unlock() - - if len(laKubeMonAgentEventsRecords) > 0 { - kubeMonAgentEventEntry := KubeMonAgentEventBlob{ - DataType: KubeMonAgentEventDataType, - IPName: IPName, - DataItems: laKubeMonAgentEventsRecords} - - marshalled, err := json.Marshal(kubeMonAgentEventEntry) + else { + + tagsValue = KubeMonAgentEventTags{ + PodName: "-", + ContainerId: "-", + // EventTime: eventTimeStamp, + FirstOccurance: "-", + LastOccurance: "-", + Count: 0, + } - Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) + tagJson, err := json.Marshal(tagsValue) if err != nil { message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) Log(message) SendException(message) - // return output.FLB_OK - } - req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) - req.Header.Set("Content-Type", "application/json") - //expensive to do string len for every request, so use a flag - if ResourceCentric == true { - req.Header.Set("x-ms-AzureResourceId", ResourceID) + } else { + + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/noerror", + Level: "Info", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: "No errors in the past hour", + Tags: fmt.Sprintf("%s", tagJson), + // Tags: fmt.Sprintf("%s", v), } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + } +//TODO + } - resp, err := HTTPClient.Do(req) - elapsed := time.Since(start) + if len(laKubeMonAgentEventsRecords) > 0 { + kubeMonAgentEventEntry := KubeMonAgentEventBlob{ + DataType: KubeMonAgentEventDataType, + IPName: IPName, + DataItems: laKubeMonAgentEventsRecords} - if err != nil { - message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) - Log(message) - Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) + marshalled, err := json.Marshal(kubeMonAgentEventEntry) - // return output.FLB_RETRY - } + Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + Log(message) + SendException(message) + // return output.FLB_OK + } + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) + req.Header.Set("Content-Type", "application/json") + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) + } - if resp == nil || resp.StatusCode != 200 { - if resp != nil { - Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + resp, err := HTTPClient.Do(req) + elapsed := time.Since(start) + + if err != nil { + message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) + Log(message) + Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) + + // return output.FLB_RETRY } - // return output.FLB_RETRY - } - defer resp.Body.Close() - numRecords := len(laKubeMonAgentEventsRecords) - Log("Successfully flushed %d records in %s", numRecords, elapsed) + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + } + // return output.FLB_RETRY + } - //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - EventHashUpdateMutex.Lock() - for k := range PromScrapeErrorEvent { - delete(PromScrapeErrorEvent, k) - } - EventHashUpdateMutex.Unlock() + defer resp.Body.Close() + numRecords := len(laKubeMonAgentEventsRecords) + Log("Successfully flushed %d records in %s", numRecords, elapsed) + //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour + EventHashUpdateMutex.Lock() + for k := range PromScrapeErrorEvent { + delete(PromScrapeErrorEvent, k) + } + EventHashUpdateMutex.Unlock() + + } } - // } } //Translates telegraf time series to one or more Azure loganalytics metric(s) diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index 3228c414a..f45a96af4 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -60,25 +60,8 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int { incomingTag := strings.ToLower(C.GoString(tag)) if strings.Contains(incomingTag, "oms.container.log.flbplugin") { - var logLines []string - for _, record := range records { - logLines = append(logLines, ToString(record["log"])) - // If record contains config error or prometheus scraping errors send it to ****** table - var logEntry = ToString(record["log"]) - if strings.Contains(logEntry, "config::error") { - populateKubeMonAgentEventHash(record, ConfigError) - } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { - populateKubeMonAgentEventHash(record, PromScrapingError) - } - } - for ; true; <-KubeMonAgentConfigEventsSendTicker.C { - return flushKubeMonAgentEventRecords() - } - - // This will also include routing to send data to OMS workspace for config errors - return PushToAppInsightsTraces(logLines, severityLevel, tag) - // return PostConfigEventsAndSendAITraces(records, appinsights.Information, incomingTag) - // return PushToAppInsightsTraces(records, appinsights.Information, incomingTag) + // This will also include populating cache to be sent as for config events + return PushToAppInsightsTraces(records, appinsights.Information, incomingTag) } else if strings.Contains(incomingTag, "oms.container.perf.telegraf") { return PostTelegrafMetricsToLA(records) } diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 7dd61ad7b..5b745ae31 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -204,24 +204,19 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { } // PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance -// func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int { -// var logLines []string -// for _, record := range records { -// logLines = append(logLines, ToString(record["log"])) -// // If record contains config error or prometheus scraping errors send it to ****** table -// var logEntry = ToString(record["log"]) -// if strings.Contains(logEntry, "config::error") { -// populateKubeMonAgentEventHash(record, ConfigError) -// } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { -// populateKubeMonAgentEventHash(record, PromScrapingError) -// } -// } - -// for ; true; <-KubeMonAgentConfigEventsSendTicker.C { -// Log("Flushing config error records\n") -// flushConfigErrorRecords() -// } -func PushToAppInsightsTraces(logLines []string, severityLevel contracts.SeverityLevel, tag string) int { +func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int { + var logLines []string + for _, record := range records { + logLines = append(logLines, ToString(record["log"])) + // If record contains config error or prometheus scraping errors send it to ****** table + var logEntry = ToString(record["log"]) + if strings.Contains(logEntry, "config::error") { + populateKubeMonAgentEventHash(record, ConfigError) + } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { + populateKubeMonAgentEventHash(record, PromScrapingError) + } + } + traceEntry := strings.Join(logLines, "\n") traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel) traceTelemetryItem.Properties["tag"] = tag From 8c275e03248e8f382fec692b9d69d826240c4367 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 15:44:09 -0700 Subject: [PATCH 091/117] changes --- source/code/go/src/plugins/oms.go | 127 +++++++++++++++--------------- 1 file changed, 62 insertions(+), 65 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index b16bad4a9..cef8b088d 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -449,92 +449,89 @@ func flushKubeMonAgentEventRecords() { var laKubeMonAgentEventsRecords []laKubeMonAgentEvents start := time.Now() + if (len(ConfigErrorEvent) > 0) || (len(PromScrapeErrorEvent) > 0) { + EventHashUpdateMutex.Lock() + for k, v := range ConfigErrorEvent { + tagJson, err := json.Marshal(v) -if ((len(ConfigErrorEvent) > 0) || (len(PromScrapeErrorEvent) > 0)) { - EventHashUpdateMutex.Lock() - for k, v := range ConfigErrorEvent { - tagJson, err := json.Marshal(v) + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + Log(message) + SendException(message) + } else { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/configmap", + Level: "Error", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + // Tags: fmt.Sprintf("%s", tagJson), + Tags: fmt.Sprintf("%s", tagJson), + } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + // Log("key[%s] value[%s]\n", k, v) + // } + } + } - if err != nil { - message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) - Log(message) - SendException(message) - } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/configmap", - Level: "Error", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - // Tags: fmt.Sprintf("%s", tagJson), - Tags: fmt.Sprintf("%s", tagJson), + for k, v := range PromScrapeErrorEvent { + tagJson, err := json.Marshal(v) + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + Log(message) + SendException(message) + } else { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: "container.azm.ms/promscraping", + Level: "Warning", + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + Tags: fmt.Sprintf("%s", tagJson), + // Tags: fmt.Sprintf("%s", v), + } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + // Log("key[%s] value[%s]\n", k, v) + // } } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) - // } } - } + EventHashUpdateMutex.Unlock() + } else { + tagsValue = KubeMonAgentEventTags{ + PodName: "-", + ContainerId: "-", + // EventTime: eventTimeStamp, + FirstOccurance: "-", + LastOccurance: "-", + Count: 0, + } - for k, v := range PromScrapeErrorEvent { - tagJson, err := json.Marshal(v) + tagJson, err := json.Marshal(tagsValue) if err != nil { message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) Log(message) SendException(message) } else { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ Computer: Computer, CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/promscraping", - Level: "Warning", + Category: "container.azm.ms/noerror", + Level: "Info", ClusterId: ResourceID, ClusterName: ResourceName, - Message: k, + Message: "No errors in the past hour", Tags: fmt.Sprintf("%s", tagJson), // Tags: fmt.Sprintf("%s", v), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) - // } } + //TODO } - EventHashUpdateMutex.Unlock() - } - else { - - tagsValue = KubeMonAgentEventTags{ - PodName: "-", - ContainerId: "-", - // EventTime: eventTimeStamp, - FirstOccurance: "-", - LastOccurance: "-", - Count: 0, - } - - tagJson, err := json.Marshal(tagsValue) - if err != nil { - message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) - Log(message) - SendException(message) - } else { - - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/noerror", - Level: "Info", - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: "No errors in the past hour", - Tags: fmt.Sprintf("%s", tagJson), - // Tags: fmt.Sprintf("%s", v), - } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - } -//TODO - } if len(laKubeMonAgentEventsRecords) > 0 { kubeMonAgentEventEntry := KubeMonAgentEventBlob{ From 74c8b4363f647b63c97560bfc180f953a4a51e59 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 15:47:32 -0700 Subject: [PATCH 092/117] changes --- source/code/go/src/plugins/oms.go | 2 +- source/code/go/src/plugins/out_oms.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index cef8b088d..c7a2ef696 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -501,7 +501,7 @@ func flushKubeMonAgentEventRecords() { } EventHashUpdateMutex.Unlock() } else { - tagsValue = KubeMonAgentEventTags{ + tagsValue := KubeMonAgentEventTags{ PodName: "-", ContainerId: "-", // EventTime: eventTimeStamp, diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go index f45a96af4..1f1915798 100644 --- a/source/code/go/src/plugins/out_oms.go +++ b/source/code/go/src/plugins/out_oms.go @@ -1,6 +1,7 @@ package main import ( + "github.com/Microsoft/ApplicationInsights-Go/appinsights" "github.com/fluent/fluent-bit-go/output" ) import ( From 411225fd3c5fdcc86c843699e7d0e585af697d53 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 17:07:15 -0700 Subject: [PATCH 093/117] changes --- source/code/go/src/plugins/oms.go | 84 ++++++++++++++++++------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index c7a2ef696..6215f9430 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -442,10 +442,17 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // // return output.FLB_OK // } +func checkResponseCodeAndRetry() { + +} + // Function to get config error log records after iterating through the two hashes func flushKubeMonAgentEventRecords() { for ; true; <-KubeMonAgentConfigEventsSendTicker.C { Log("In flushConfigErrorRecords\n") + var retries = 2 + var flushSuccessful = false + var resp, err, elapsed var laKubeMonAgentEventsRecords []laKubeMonAgentEvents start := time.Now() @@ -501,6 +508,8 @@ func flushKubeMonAgentEventRecords() { } EventHashUpdateMutex.Unlock() } else { + + //Sending a record in case there are no errors to be able to differentiate between no data vs no errors tagsValue := KubeMonAgentEventTags{ PodName: "-", ContainerId: "-", @@ -524,13 +533,12 @@ func flushKubeMonAgentEventRecords() { Level: "Info", ClusterId: ResourceID, ClusterName: ResourceName, - Message: "No errors in the past hour", + Message: "No errors", Tags: fmt.Sprintf("%s", tagJson), // Tags: fmt.Sprintf("%s", v), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) } - //TODO } if len(laKubeMonAgentEventsRecords) > 0 { @@ -547,43 +555,51 @@ func flushKubeMonAgentEventRecords() { Log(message) SendException(message) // return output.FLB_OK - } - req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) - req.Header.Set("Content-Type", "application/json") - //expensive to do string len for every request, so use a flag - if ResourceCentric == true { - req.Header.Set("x-ms-AzureResourceId", ResourceID) - } - - resp, err := HTTPClient.Do(req) - elapsed := time.Since(start) - - if err != nil { - message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) - Log(message) - Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) - - // return output.FLB_RETRY - } + } else { + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) + req.Header.Set("Content-Type", "application/json") + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) + } - if resp == nil || resp.StatusCode != 200 { - if resp != nil { - Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + // Retry in case of failure + for retries > 0 { + resp, err = HTTPClient.Do(req) + elapsed = time.Since(start) + + if err != nil { + message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) + Log(message) + Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) + retries -= 1 + } else { + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + retries -= 1 + } + // return output.FLB_RETRY + } else { + flushSuccessful = true + break + } + } } - // return output.FLB_RETRY - } - defer resp.Body.Close() - numRecords := len(laKubeMonAgentEventsRecords) - Log("Successfully flushed %d records in %s", numRecords, elapsed) + defer resp.Body.Close() + if flushSuccessful { + numRecords := len(laKubeMonAgentEventsRecords) + Log("Successfully flushed %d records in %s", numRecords, elapsed) - //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - EventHashUpdateMutex.Lock() - for k := range PromScrapeErrorEvent { - delete(PromScrapeErrorEvent, k) + //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour + EventHashUpdateMutex.Lock() + for k := range PromScrapeErrorEvent { + delete(PromScrapeErrorEvent, k) + } + EventHashUpdateMutex.Unlock() + } } - EventHashUpdateMutex.Unlock() - } } } From 33e105975cdcd2daa6a788e34ec7041f4068e0f5 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 17:08:17 -0700 Subject: [PATCH 094/117] changes --- source/code/go/src/plugins/oms.go | 1 - 1 file changed, 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 6215f9430..664c88ca3 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -579,7 +579,6 @@ func flushKubeMonAgentEventRecords() { Log("Status %s Status Code %d", resp.Status, resp.StatusCode) retries -= 1 } - // return output.FLB_RETRY } else { flushSuccessful = true break From a9330761808aa3880b3d06e37297e5fc2ca66827 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 17:10:06 -0700 Subject: [PATCH 095/117] changes --- source/code/go/src/plugins/oms.go | 32 ------------------------------- 1 file changed, 32 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 664c88ca3..4a5f39fe8 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -414,38 +414,6 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K Log("Updating config event hash - Unlocked after update \n ") } -// Sends data to post config events and to App Insights Instance -// func PostConfigEventsAndSendAITraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int { -// var logLines []string -// for _, record := range records { -// logLines = append(logLines, ToString(record["log"])) -// // If record contains config error or prometheus scraping errors send it to ****** table -// var logEntry = ToString(record["log"]) -// if strings.Contains(logEntry, "config::error") { -// populateKubeMonAgentEventHash(record, ConfigError) -// } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { -// populateKubeMonAgentEventHash(record, PromScrapingError) -// } -// } - -// // Pushing traces to App Insights after joining the record lines -// PushToAppInsightsTraces(logLines, severityLevel, tag) - -// // for ; true; <-KubeMonAgentConfigEventsSendTicker.C { -// // Log("Flushing config error records\n") -// // flushConfigErrorRecords() -// // } -// // traceEntry := strings.Join(logLines, "\n") -// // traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel) -// // traceTelemetryItem.Properties["tag"] = tag -// // TelemetryClient.Track(traceTelemetryItem) -// // return output.FLB_OK -// } - -func checkResponseCodeAndRetry() { - -} - // Function to get config error log records after iterating through the two hashes func flushKubeMonAgentEventRecords() { for ; true; <-KubeMonAgentConfigEventsSendTicker.C { From bafe229e32e59d0a7ff53986f04aece322076aec Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 17:18:06 -0700 Subject: [PATCH 096/117] changes --- source/code/go/src/plugins/oms.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 4a5f39fe8..fac42c2e4 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -420,7 +420,9 @@ func flushKubeMonAgentEventRecords() { Log("In flushConfigErrorRecords\n") var retries = 2 var flushSuccessful = false - var resp, err, elapsed + var resp *Response + var err error + var elapsed Duration var laKubeMonAgentEventsRecords []laKubeMonAgentEvents start := time.Now() From a94c70bbb9871de3bfe940645d9f4cfd89c10c5b Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 17:21:34 -0700 Subject: [PATCH 097/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index fac42c2e4..7392a3261 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -420,9 +420,9 @@ func flushKubeMonAgentEventRecords() { Log("In flushConfigErrorRecords\n") var retries = 2 var flushSuccessful = false - var resp *Response + var resp *http.Response var err error - var elapsed Duration + var elapsed int64 var laKubeMonAgentEventsRecords []laKubeMonAgentEvents start := time.Now() From f9e056867a8f8dd477d75ce151549e21daff15f2 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 17:23:47 -0700 Subject: [PATCH 098/117] changes --- source/code/go/src/plugins/oms.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 7392a3261..9a5dd8334 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -422,7 +422,7 @@ func flushKubeMonAgentEventRecords() { var flushSuccessful = false var resp *http.Response var err error - var elapsed int64 + var elapsed time.Duration var laKubeMonAgentEventsRecords []laKubeMonAgentEvents start := time.Now() From 3333c164d318f0c115ee462772c76e2d87f7ad14 Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 17:25:49 -0700 Subject: [PATCH 099/117] changes --- source/code/go/src/plugins/oms.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 9a5dd8334..22d42c622 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -421,7 +421,7 @@ func flushKubeMonAgentEventRecords() { var retries = 2 var flushSuccessful = false var resp *http.Response - var err error + var postError error var elapsed time.Duration var laKubeMonAgentEventsRecords []laKubeMonAgentEvents start := time.Now() @@ -535,10 +535,10 @@ func flushKubeMonAgentEventRecords() { // Retry in case of failure for retries > 0 { - resp, err = HTTPClient.Do(req) + resp, postError = HTTPClient.Do(req) elapsed = time.Since(start) - if err != nil { + if postError != nil { message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) Log(message) Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) From 4f2c2834d6bbaffb067864630b621f61abb77ace Mon Sep 17 00:00:00 2001 From: rashmy Date: Mon, 23 Sep 2019 21:10:30 -0700 Subject: [PATCH 100/117] changes --- source/code/go/src/plugins/oms.go | 89 ++++++++++--------------- source/code/go/src/plugins/telemetry.go | 2 +- 2 files changed, 37 insertions(+), 54 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 22d42c622..a0a7effe1 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -28,6 +28,9 @@ const ContainerLogDataType = "CONTAINER_LOG_BLOB" // DataType for Insights metric const InsightsMetricsDataType = "INSIGHTS_METRICS_BLOB" +// DataType for KubeMonAgentEvent +const KubeMonAgentEventDataType = "KUBE_MON_AGENT_EVENTS_BLOB" + //env varibale which has ResourceId for LA const ResourceIdEnv = "AKS_RESOURCE_ID" @@ -46,6 +49,18 @@ const TelegrafTagClusterName = "clusterName" // clusterId tag const TelegrafTagClusterID = "clusterId" +const ConfigErrorEventCategory = "container.azm.ms/configmap" + +const PromScrapingErrorEventCategory = "container.azm.ms/promscraping" + +const NoErrorEventCategory = "container.azm.ms/noerror" + +const KubeMonAgentEventError = "Error" + +const KubeMonAgentEventWarning = "Warning" + +const KubeMonAgentEventInfo = "Info" + // ContainerLogPluginConfFilePath --> config file path for container log plugin const DaemonSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf" const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf" @@ -161,17 +176,11 @@ type laKubeMonAgentEvents struct { ClusterName string `json:"ClusterName"` Message string `json:"Message"` Tags string `json:"Tags"` - // ConfigErrorMessage string `json:"ConfigErrorMessage"` - // ContainerId string `json:"ContainerId"` - // PodName string `json:"PodName"` - // ConfigErrorTime string `json:"ConfigErrorTime"` - // ConfigErrorLevel string `json:"ConfigErrorLevel"` } type KubeMonAgentEventTags struct { - PodName string - ContainerId string - // EventTime string + PodName string + ContainerId string FirstOccurance string LastOccurance string Count int @@ -192,9 +201,6 @@ const ( PromScrapingError ) -// DataType for Config error -const KubeMonAgentEventDataType = "KUBE_MON_AGENT_EVENTS_BLOB" - func createLogger() *log.Logger { var logfile *os.File path := "/var/opt/microsoft/docker-cimprov/log/fluent-bit-out-oms-runtime.log" @@ -325,7 +331,6 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K EventHashUpdateMutex.Lock() switch errType { case ConfigError: - // Log("ConfigErrorEvent\n") // Doing this since the error logger library is adding quotes around the string and a newline to the end because // we are converting string to json to log lines in different lines as one record logRecordString = strings.TrimSuffix(logRecordString, "\n") @@ -339,23 +344,16 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K eventFirstOccurance := val.FirstOccurance ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{ - PodName: podName, - ContainerId: containerID, - // EventTime: eventTimeStamp, + PodName: podName, + ContainerId: containerID, FirstOccurance: eventFirstOccurance, LastOccurance: eventTimeStamp, Count: eventCount + 1, } - // (*val).LastOccurance = eventTimeStamp - // (*val).Count = ((*val).Count) + 1 - // if existingErrorEvent != nil { - // existingErrorEvent.LastOccurance = eventTimeStamp - // existingErrorEvent.Count = existingErrorEvent.Count + 1 } else { ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{ - PodName: podName, - ContainerId: containerID, - // EventTime: eventTimeStamp, + PodName: podName, + ContainerId: containerID, FirstOccurance: eventTimeStamp, LastOccurance: eventTimeStamp, Count: 1, @@ -370,38 +368,22 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // Trimming the newline character at the end since this is being added as the key splitString = strings.TrimSuffix(splitString, "\n") if splitString != "" { - // existingErrorEvent := PromScrapeErrorEvent[splitString] - // if existingErrorEvent != nil { - // existingErrorEvent.LastOccurance = eventTimeStamp - // existingErrorEvent.Count = existingErrorEvent.Count + 1 - // Log("Trying to get existing value: \n") - // for k, v := range PromScrapeErrorEvent { - // Log("key[%s] value[%s]\n", k, v) - // Log("splitString: %s", splitString) - // } - if val, ok := PromScrapeErrorEvent[splitString]; ok { Log("In config error existing hash update\n") eventCount := val.Count eventFirstOccurance := val.FirstOccurance PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ - PodName: podName, - ContainerId: containerID, - // EventTime: eventTimeStamp, + PodName: podName, + ContainerId: containerID, FirstOccurance: eventFirstOccurance, LastOccurance: eventTimeStamp, Count: eventCount + 1, } - - // existingErrorEvent := &PromScrapeErrorEvent[splitString] - // (*val).LastOccurance = eventTimeStamp - // (*val).Count = ((*val).Count) + 1 } else { PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ - PodName: podName, - ContainerId: containerID, - // ErrorTimeStamp: errorTimeStamp, + PodName: podName, + ContainerId: containerID, FirstOccurance: eventTimeStamp, LastOccurance: eventTimeStamp, Count: 1, @@ -439,13 +421,12 @@ func flushKubeMonAgentEventRecords() { laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ Computer: Computer, CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/configmap", - Level: "Error", + Category: ConfigErrorEventCategory, + Level: KubeMonAgentEventError, ClusterId: ResourceID, ClusterName: ResourceName, Message: k, - // Tags: fmt.Sprintf("%s", tagJson), - Tags: fmt.Sprintf("%s", tagJson), + Tags: fmt.Sprintf("%s", tagJson), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) // Log("key[%s] value[%s]\n", k, v) @@ -463,8 +444,8 @@ func flushKubeMonAgentEventRecords() { laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ Computer: Computer, CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/promscraping", - Level: "Warning", + Category: PromScrapingErrorEventCategory, + Level: KubeMonAgentEventWarning, ClusterId: ResourceID, ClusterName: ResourceName, Message: k, @@ -499,13 +480,12 @@ func flushKubeMonAgentEventRecords() { laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ Computer: Computer, CollectionTime: start.Format(time.RFC3339), - Category: "container.azm.ms/noerror", - Level: "Info", + Category: NoErrorEventCategory, + Level: KubeMonAgentEventInfo, ClusterId: ResourceID, ClusterName: ResourceName, Message: "No errors", Tags: fmt.Sprintf("%s", tagJson), - // Tags: fmt.Sprintf("%s", v), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) } @@ -556,7 +536,9 @@ func flushKubeMonAgentEventRecords() { } } - defer resp.Body.Close() + if resp != nil && resp.Body != nil { + defer resp.Body.Close() + } if flushSuccessful { numRecords := len(laKubeMonAgentEventsRecords) Log("Successfully flushed %d records in %s", numRecords, elapsed) @@ -566,6 +548,7 @@ func flushKubeMonAgentEventRecords() { for k := range PromScrapeErrorEvent { delete(PromScrapeErrorEvent, k) } + Log("PromScrapeErrorEvent cache cleared\n") EventHashUpdateMutex.Unlock() } } diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 5b745ae31..6e5157488 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -208,7 +208,7 @@ func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLeve var logLines []string for _, record := range records { logLines = append(logLines, ToString(record["log"])) - // If record contains config error or prometheus scraping errors send it to ****** table + // If record contains config error or prometheus scraping errors send it to KubeMonAgentEvents table var logEntry = ToString(record["log"]) if strings.Contains(logEntry, "config::error") { populateKubeMonAgentEventHash(record, ConfigError) From 9cef47011e13c26072e88dbb72409af0c0c709f4 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 25 Sep 2019 10:28:22 -0700 Subject: [PATCH 101/117] changes --- source/code/go/src/plugins/oms.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index a0a7effe1..0370de08a 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -68,7 +68,8 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp // IPName for Container Log const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 -const kubeMonAgentConfigEventFlushInterval = 60 +// const kubeMonAgentConfigEventFlushInterval = 60 +const kubeMonAgentConfigEventFlushInterval = 1 var ( // PluginConfiguration the plugins configuration From ec022d4d01d5ae63bdc53c9a3f289e8d485ed3b9 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 25 Sep 2019 11:37:46 -0700 Subject: [PATCH 102/117] changes --- source/code/go/src/plugins/oms.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 0370de08a..3deffc311 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -68,6 +68,7 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp // IPName for Container Log const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 + // const kubeMonAgentConfigEventFlushInterval = 60 const kubeMonAgentConfigEventFlushInterval = 1 @@ -516,7 +517,9 @@ func flushKubeMonAgentEventRecords() { // Retry in case of failure for retries > 0 { + Log("In retry block with retry count: %d", retries) resp, postError = HTTPClient.Do(req) + Log("Post response status %s status code %d", resp.Status, resp.StatusCode) elapsed = time.Since(start) if postError != nil { @@ -531,6 +534,7 @@ func flushKubeMonAgentEventRecords() { retries -= 1 } } else { + Log("Setting flush successful to true\n") flushSuccessful = true break } @@ -538,9 +542,10 @@ func flushKubeMonAgentEventRecords() { } if resp != nil && resp.Body != nil { + Log("Closing response body") defer resp.Body.Close() } - if flushSuccessful { + if flushSuccessful == true { numRecords := len(laKubeMonAgentEventsRecords) Log("Successfully flushed %d records in %s", numRecords, elapsed) From b206ecf700bd0822f588321f20b9c3c252861df7 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 25 Sep 2019 11:52:32 -0700 Subject: [PATCH 103/117] changes --- source/code/go/src/plugins/oms.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 3deffc311..700217700 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -661,6 +661,8 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int return output.FLB_OK } + Log("telegrafmetrics-marshalled:\n" + ToString(jsonBytes)) + //Post metrics data to LA req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes)) @@ -802,6 +804,8 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { SendException(message) return output.FLB_OK } + Log("containerlog-marshalled:\n" + ToString(marshalled)) + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) req.Header.Set("Content-Type", "application/json") //expensive to do string len for every request, so use a flag From 417045e918b33ddbefcd8d33fc07d9e4408b1602 Mon Sep 17 00:00:00 2001 From: rashmy Date: Wed, 25 Sep 2019 21:21:14 -0700 Subject: [PATCH 104/117] changes --- source/code/go/src/plugins/oms.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 700217700..960b1a1c8 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -69,8 +69,7 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 -// const kubeMonAgentConfigEventFlushInterval = 60 -const kubeMonAgentConfigEventFlushInterval = 1 +const kubeMonAgentConfigEventFlushInterval = 60 var ( // PluginConfiguration the plugins configuration @@ -661,8 +660,6 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int return output.FLB_OK } - Log("telegrafmetrics-marshalled:\n" + ToString(jsonBytes)) - //Post metrics data to LA req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(jsonBytes)) @@ -804,7 +801,6 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { SendException(message) return output.FLB_OK } - Log("containerlog-marshalled:\n" + ToString(marshalled)) req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) req.Header.Set("Content-Type", "application/json") From b9478c5395ff5c6ba5779747a790d244bc4b2704 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 26 Sep 2019 16:53:01 -0700 Subject: [PATCH 105/117] changes --- source/code/go/src/plugins/oms.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 960b1a1c8..3dc47e481 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -61,6 +61,8 @@ const KubeMonAgentEventWarning = "Warning" const KubeMonAgentEventInfo = "Info" +const KubeMonAgentEventsFlushedEvent = "KubeMonAgentEventsFlushed" + // ContainerLogPluginConfFilePath --> config file path for container log plugin const DaemonSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf" const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf" @@ -407,8 +409,12 @@ func flushKubeMonAgentEventRecords() { var postError error var elapsed time.Duration var laKubeMonAgentEventsRecords []laKubeMonAgentEvents + telemetryDimensions := make(map[string]string) start := time.Now() + telemetryDimensions["ConfigErrorEventCount"] = ToString(len(ConfigErrorEvent)) + telemetryDimensions["PromScrapeErrorEventCount"] = ToString(len(PromScrapeErrorEvent)) + if (len(ConfigErrorEvent) > 0) || (len(PromScrapeErrorEvent) > 0) { EventHashUpdateMutex.Lock() for k, v := range ConfigErrorEvent { @@ -547,6 +553,7 @@ func flushKubeMonAgentEventRecords() { if flushSuccessful == true { numRecords := len(laKubeMonAgentEventsRecords) Log("Successfully flushed %d records in %s", numRecords, elapsed) + SendEvent(KubeMonAgentEventsFlushedEvent, telemetryDimensions) //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour EventHashUpdateMutex.Lock() From 30e9773bc0d0a60319256d50ff2346233e609977 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 26 Sep 2019 17:15:46 -0700 Subject: [PATCH 106/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 3dc47e481..83b4ec95f 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -412,8 +412,8 @@ func flushKubeMonAgentEventRecords() { telemetryDimensions := make(map[string]string) start := time.Now() - telemetryDimensions["ConfigErrorEventCount"] = ToString(len(ConfigErrorEvent)) - telemetryDimensions["PromScrapeErrorEventCount"] = ToString(len(PromScrapeErrorEvent)) + telemetryDimensions["ConfigErrorEventCount"] = strconv.Itoa(len(ConfigErrorEvent) + telemetryDimensions["PromScrapeErrorEventCount"] = strconv.Itoa(len(PromScrapeErrorEvent) if (len(ConfigErrorEvent) > 0) || (len(PromScrapeErrorEvent) > 0) { EventHashUpdateMutex.Lock() From b4b32e9378517eb2c21aa1603b3ba9cdba56958d Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 26 Sep 2019 17:19:49 -0700 Subject: [PATCH 107/117] changes --- source/code/go/src/plugins/oms.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 83b4ec95f..73282e282 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -412,8 +412,8 @@ func flushKubeMonAgentEventRecords() { telemetryDimensions := make(map[string]string) start := time.Now() - telemetryDimensions["ConfigErrorEventCount"] = strconv.Itoa(len(ConfigErrorEvent) - telemetryDimensions["PromScrapeErrorEventCount"] = strconv.Itoa(len(PromScrapeErrorEvent) + telemetryDimensions["ConfigErrorEventCount"] = strconv.Itoa(len(ConfigErrorEvent)) + telemetryDimensions["PromScrapeErrorEventCount"] = strconv.Itoa(len(PromScrapeErrorEvent)) if (len(ConfigErrorEvent) > 0) || (len(PromScrapeErrorEvent) > 0) { EventHashUpdateMutex.Lock() From 134a25fdc5ffedbea791f1b429d414cf12a59586 Mon Sep 17 00:00:00 2001 From: rashmy Date: Thu, 26 Sep 2019 17:50:33 -0700 Subject: [PATCH 108/117] changes --- source/code/go/src/plugins/oms.go | 265 +++++++++++++++--------------- 1 file changed, 132 insertions(+), 133 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 73282e282..e81d08548 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -90,6 +90,8 @@ var ( ResourceCentric bool //ResourceName ResourceName string + //KubeMonAgentEvents skip first flush + skipKubeMonEventsFlush bool ) var ( @@ -402,168 +404,163 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K // Function to get config error log records after iterating through the two hashes func flushKubeMonAgentEventRecords() { for ; true; <-KubeMonAgentConfigEventsSendTicker.C { - Log("In flushConfigErrorRecords\n") - var retries = 2 - var flushSuccessful = false - var resp *http.Response - var postError error - var elapsed time.Duration - var laKubeMonAgentEventsRecords []laKubeMonAgentEvents - telemetryDimensions := make(map[string]string) - start := time.Now() - - telemetryDimensions["ConfigErrorEventCount"] = strconv.Itoa(len(ConfigErrorEvent)) - telemetryDimensions["PromScrapeErrorEventCount"] = strconv.Itoa(len(PromScrapeErrorEvent)) - - if (len(ConfigErrorEvent) > 0) || (len(PromScrapeErrorEvent) > 0) { - EventHashUpdateMutex.Lock() - for k, v := range ConfigErrorEvent { - tagJson, err := json.Marshal(v) + if skipKubeMonEventsFlush != true { + Log("In flushConfigErrorRecords\n") + var retries = 2 + var flushSuccessful = false + var resp *http.Response + var postError error + var elapsed time.Duration + var laKubeMonAgentEventsRecords []laKubeMonAgentEvents + telemetryDimensions := make(map[string]string) + start := time.Now() + + telemetryDimensions["ConfigErrorEventCount"] = strconv.Itoa(len(ConfigErrorEvent)) + telemetryDimensions["PromScrapeErrorEventCount"] = strconv.Itoa(len(PromScrapeErrorEvent)) + + if (len(ConfigErrorEvent) > 0) || (len(PromScrapeErrorEvent) > 0) { + EventHashUpdateMutex.Lock() + for k, v := range ConfigErrorEvent { + tagJson, err := json.Marshal(v) + + if err != nil { + message := fmt.Sprintf("Error while Marshalling config error event tags: %s", err.Error()) + Log(message) + SendException(message) + } else { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: ConfigErrorEventCategory, + Level: KubeMonAgentEventError, + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + Tags: fmt.Sprintf("%s", tagJson), + } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) + } + } - if err != nil { - message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) - Log(message) - SendException(message) - } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: ConfigErrorEventCategory, - Level: KubeMonAgentEventError, - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: k, - Tags: fmt.Sprintf("%s", tagJson), + for k, v := range PromScrapeErrorEvent { + tagJson, err := json.Marshal(v) + if err != nil { + message := fmt.Sprintf("Error while Marshalling prom scrape error event tags: %s", err.Error()) + Log(message) + SendException(message) + } else { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ + Computer: Computer, + CollectionTime: start.Format(time.RFC3339), + Category: PromScrapingErrorEventCategory, + Level: KubeMonAgentEventWarning, + ClusterId: ResourceID, + ClusterName: ResourceName, + Message: k, + Tags: fmt.Sprintf("%s", tagJson), + } + laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) - // } } - } + EventHashUpdateMutex.Unlock() + } else { + //Sending a record in case there are no errors to be able to differentiate between no data vs no errors + tagsValue := KubeMonAgentEventTags{ + PodName: "-", + ContainerId: "-", + FirstOccurance: "-", + LastOccurance: "-", + Count: 0, + } - for k, v := range PromScrapeErrorEvent { - tagJson, err := json.Marshal(v) + tagJson, err := json.Marshal(tagsValue) if err != nil { - message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) + message := fmt.Sprintf("Error while Marshalling no error tags: %s", err.Error()) Log(message) SendException(message) } else { + laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ Computer: Computer, CollectionTime: start.Format(time.RFC3339), - Category: PromScrapingErrorEventCategory, - Level: KubeMonAgentEventWarning, + Category: NoErrorEventCategory, + Level: KubeMonAgentEventInfo, ClusterId: ResourceID, ClusterName: ResourceName, - Message: k, + Message: "No errors", Tags: fmt.Sprintf("%s", tagJson), - // Tags: fmt.Sprintf("%s", v), } laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - // Log("key[%s] value[%s]\n", k, v) - // } } } - EventHashUpdateMutex.Unlock() - } else { - //Sending a record in case there are no errors to be able to differentiate between no data vs no errors - tagsValue := KubeMonAgentEventTags{ - PodName: "-", - ContainerId: "-", - // EventTime: eventTimeStamp, - FirstOccurance: "-", - LastOccurance: "-", - Count: 0, - } + if len(laKubeMonAgentEventsRecords) > 0 { + kubeMonAgentEventEntry := KubeMonAgentEventBlob{ + DataType: KubeMonAgentEventDataType, + IPName: IPName, + DataItems: laKubeMonAgentEventsRecords} - tagJson, err := json.Marshal(tagsValue) - if err != nil { - message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) - Log(message) - SendException(message) - } else { - - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ - Computer: Computer, - CollectionTime: start.Format(time.RFC3339), - Category: NoErrorEventCategory, - Level: KubeMonAgentEventInfo, - ClusterId: ResourceID, - ClusterName: ResourceName, - Message: "No errors", - Tags: fmt.Sprintf("%s", tagJson), - } - laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) - } - } - - if len(laKubeMonAgentEventsRecords) > 0 { - kubeMonAgentEventEntry := KubeMonAgentEventBlob{ - DataType: KubeMonAgentEventDataType, - IPName: IPName, - DataItems: laKubeMonAgentEventsRecords} + marshalled, err := json.Marshal(kubeMonAgentEventEntry) - marshalled, err := json.Marshal(kubeMonAgentEventEntry) - - Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) - if err != nil { - message := fmt.Sprintf("Error while Marshalling config error entry: %s", err.Error()) - Log(message) - SendException(message) - // return output.FLB_OK - } else { - req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) - req.Header.Set("Content-Type", "application/json") - //expensive to do string len for every request, so use a flag - if ResourceCentric == true { - req.Header.Set("x-ms-AzureResourceId", ResourceID) - } - - // Retry in case of failure - for retries > 0 { - Log("In retry block with retry count: %d", retries) - resp, postError = HTTPClient.Do(req) - Log("Post response status %s status code %d", resp.Status, resp.StatusCode) - elapsed = time.Since(start) + // Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) + if err != nil { + message := fmt.Sprintf("Error while marshalling kubemonagentevent entry: %s", err.Error()) + Log(message) + SendException(message) + } else { + req, _ := http.NewRequest("POST", OMSEndpoint, bytes.NewBuffer(marshalled)) + req.Header.Set("Content-Type", "application/json") + //expensive to do string len for every request, so use a flag + if ResourceCentric == true { + req.Header.Set("x-ms-AzureResourceId", ResourceID) + } - if postError != nil { - message := fmt.Sprintf("Error when sending config error request %s \n", err.Error()) - Log(message) - Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) - retries -= 1 - } else { - if resp == nil || resp.StatusCode != 200 { - if resp != nil { - Log("Status %s Status Code %d", resp.Status, resp.StatusCode) - retries -= 1 - } + // Retry in case of failure + for retries > 0 { + Log("In retry block with retry count: %d", retries) + resp, postError = HTTPClient.Do(req) + Log("Post response status %s status code %d", resp.Status, resp.StatusCode) + elapsed = time.Since(start) + + if postError != nil { + message := fmt.Sprintf("Error when sending kubemonagentevent request %s \n", err.Error()) + Log(message) + Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) + retries -= 1 } else { - Log("Setting flush successful to true\n") - flushSuccessful = true - break + if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + retries -= 1 + } + } else { + flushSuccessful = true + break + } } } - } - if resp != nil && resp.Body != nil { - Log("Closing response body") - defer resp.Body.Close() - } - if flushSuccessful == true { - numRecords := len(laKubeMonAgentEventsRecords) - Log("Successfully flushed %d records in %s", numRecords, elapsed) - SendEvent(KubeMonAgentEventsFlushedEvent, telemetryDimensions) - - //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - EventHashUpdateMutex.Lock() - for k := range PromScrapeErrorEvent { - delete(PromScrapeErrorEvent, k) + if resp != nil && resp.Body != nil { + defer resp.Body.Close() + } + if flushSuccessful == true { + numRecords := len(laKubeMonAgentEventsRecords) + Log("Successfully flushed %d records in %s", numRecords, elapsed) + SendEvent(KubeMonAgentEventsFlushedEvent, telemetryDimensions) + + //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour + EventHashUpdateMutex.Lock() + for k := range PromScrapeErrorEvent { + delete(PromScrapeErrorEvent, k) + } + Log("PromScrapeErrorEvent cache cleared\n") + EventHashUpdateMutex.Unlock() } - Log("PromScrapeErrorEvent cache cleared\n") - EventHashUpdateMutex.Unlock() } } + } else { + // Setting this to false to allow for subsequent flushes after the first hour + skipKubeMonEventsFlush = false } } } @@ -907,6 +904,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) { // whereas the prometheus scrape error hash needs to be refreshed every hour ConfigErrorEvent = make(map[string]KubeMonAgentEventTags) PromScrapeErrorEvent = make(map[string]KubeMonAgentEventTags) + // Initilizing this to true to skip the first kubemonagentevent flush since the errors are not populated at this time + skipKubeMonEventsFlush = true pluginConfig, err := ReadConfiguration(pluginConfPath) if err != nil { From 16211a3e45ddea77948d55f68612ddf7a7bc9939 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 27 Sep 2019 10:21:32 -0700 Subject: [PATCH 109/117] changes --- source/code/go/src/plugins/oms.go | 3 --- source/code/go/src/plugins/telemetry.go | 9 --------- 2 files changed, 12 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index e81d08548..527971fa7 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -341,9 +341,7 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K logRecordString = strings.TrimSuffix(logRecordString, "\n") logRecordString = logRecordString[1 : len(logRecordString)-1] - // var existingErrorEvent = ConfigErrorEvent[logRecordString] if val, ok := ConfigErrorEvent[logRecordString]; ok { - // existingErrorEvent := &ConfigErrorEvent[logRecordString] Log("In config error existing hash update\n") eventCount := val.Count eventFirstOccurance := val.FirstOccurance @@ -502,7 +500,6 @@ func flushKubeMonAgentEventRecords() { marshalled, err := json.Marshal(kubeMonAgentEventEntry) - // Log("configerrorlogdata-marshalled:\n" + ToString(marshalled)) if err != nil { message := fmt.Sprintf("Error while marshalling kubemonagentevent entry: %s", err.Error()) Log(message) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index 6e5157488..bef664e5b 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -56,15 +56,6 @@ const ( eventNameDaemonSetHeartbeat = "ContainerLogDaemonSetHeartbeatEvent" ) -// ErrorType to be used as enum -// type ErrorType int - -// const ( -// // ErrorType to be used as enum for ConfigError and ScrapingError -// ConfigError ErrorType = iota -// ScrapingError -// ) - // SendContainerLogPluginMetrics is a go-routine that flushes the data periodically (every 5 mins to App Insights) func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) { telemetryPushInterval, err := strconv.Atoi(telemetryPushIntervalProperty) From 297be54b9c699b4f9773d6c273e75f98a325d013 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 27 Sep 2019 10:26:00 -0700 Subject: [PATCH 110/117] changes --- source/code/go/src/plugins/telemetry.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go index bef664e5b..d5675187f 100644 --- a/source/code/go/src/plugins/telemetry.go +++ b/source/code/go/src/plugins/telemetry.go @@ -198,13 +198,14 @@ func InitializeTelemetryClient(agentVersion string) (int, error) { func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int { var logLines []string for _, record := range records { - logLines = append(logLines, ToString(record["log"])) // If record contains config error or prometheus scraping errors send it to KubeMonAgentEvents table var logEntry = ToString(record["log"]) if strings.Contains(logEntry, "config::error") { populateKubeMonAgentEventHash(record, ConfigError) } else if strings.Contains(logEntry, "E! [inputs.prometheus]") { populateKubeMonAgentEventHash(record, PromScrapingError) + } else { + logLines = append(logLines, logEntry) } } From fbd45ac21a2f053bc7dc75e2cbfa234b92dfd94f Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 27 Sep 2019 10:26:38 -0700 Subject: [PATCH 111/117] changes --- source/code/go/src/plugins/oms.go | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 527971fa7..11b839c5f 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -463,13 +463,7 @@ func flushKubeMonAgentEventRecords() { EventHashUpdateMutex.Unlock() } else { //Sending a record in case there are no errors to be able to differentiate between no data vs no errors - tagsValue := KubeMonAgentEventTags{ - PodName: "-", - ContainerId: "-", - FirstOccurance: "-", - LastOccurance: "-", - Count: 0, - } + tagsValue := KubeMonAgentEventTags{} tagJson, err := json.Marshal(tagsValue) if err != nil { @@ -477,7 +471,6 @@ func flushKubeMonAgentEventRecords() { Log(message) SendException(message) } else { - laKubeMonAgentEventsRecord := laKubeMonAgentEvents{ Computer: Computer, CollectionTime: start.Format(time.RFC3339), From 4519efa18f26de1ee9a84583b338b917de8ef192 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 27 Sep 2019 10:40:36 -0700 Subject: [PATCH 112/117] changes --- source/code/go/src/plugins/oms.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 11b839c5f..67cf6d404 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -71,7 +71,8 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 -const kubeMonAgentConfigEventFlushInterval = 60 +// const kubeMonAgentConfigEventFlushInterval = 60 +const kubeMonAgentConfigEventFlushInterval = 3 var ( // PluginConfiguration the plugins configuration From 2c1ee93b399dd812013146bd3b87fe077826f379 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 27 Sep 2019 11:13:55 -0700 Subject: [PATCH 113/117] changes --- installer/scripts/tomlparser-prom-customconfig.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb index d365c87e5..f2a1206f2 100644 --- a/installer/scripts/tomlparser-prom-customconfig.rb +++ b/installer/scripts/tomlparser-prom-customconfig.rb @@ -237,7 +237,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Successfully created telemetry file for daemonset" end else - puts "config::Typecheck failed for prometheus config settings for daemonset, using defaults" + ConfigParseErrorLogger.logError("Typecheck failed for prometheus config settings for daemonset, using defaults") end # end of type check condition rescue => errorStr ConfigParseErrorLogger.logError("Exception while parsing config file for prometheus config for daemonset: #{errorStr}, using defaults") From e2f3a18363de3a7e61e86e5f424cf0328698e8f4 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 27 Sep 2019 13:40:08 -0700 Subject: [PATCH 114/117] changes --- source/code/go/src/plugins/oms.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 67cf6d404..11b839c5f 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -71,8 +71,7 @@ const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimp const IPName = "Containers" const defaultContainerInventoryRefreshInterval = 60 -// const kubeMonAgentConfigEventFlushInterval = 60 -const kubeMonAgentConfigEventFlushInterval = 3 +const kubeMonAgentConfigEventFlushInterval = 60 var ( // PluginConfiguration the plugins configuration From 628d912e62dd0b336337592ec8b34fd223f8369d Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 27 Sep 2019 17:20:49 -0700 Subject: [PATCH 115/117] changes --- installer/scripts/td-agent-bit-conf-customizer.rb | 2 +- installer/scripts/tomlparser-prom-customconfig.rb | 6 +++--- installer/scripts/tomlparser.rb | 14 +++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/installer/scripts/td-agent-bit-conf-customizer.rb b/installer/scripts/td-agent-bit-conf-customizer.rb index 8fd60f968..fae3acb36 100644 --- a/installer/scripts/td-agent-bit-conf-customizer.rb +++ b/installer/scripts/td-agent-bit-conf-customizer.rb @@ -41,7 +41,7 @@ def substituteFluentBitPlaceHolders File.open(@td_agent_bit_conf_path, "w") { |file| file.puts new_contents } puts "config::Successfully substituted the placeholders in td-agent-bit.conf file" rescue => errorStr - ConfigParseErrorLogger.logError("td-agent-bit-config-customizer: error while substituting values: #{errorStr}") + ConfigParseErrorLogger.logError("td-agent-bit-config-customizer: error while substituting values in td-agent-bit.conf file: #{errorStr}") end end diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb index f2a1206f2..62dc15a6f 100644 --- a/installer/scripts/tomlparser-prom-customconfig.rb +++ b/installer/scripts/tomlparser-prom-customconfig.rb @@ -41,7 +41,7 @@ def parseConfigMap return nil end rescue => errorStr - ConfigParseErrorLogger.logError("Exception while parsing toml config file for prometheus config: #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while parsing config map for prometheus config: #{errorStr}, using defaults") return nil end end @@ -67,7 +67,7 @@ def replaceDefaultMonitorPodSettings(new_contents, monitorKubernetesPods) new_contents = new_contents.gsub("$AZMON_RS_PROM_MONITOR_PODS", ("monitor_kubernetes_pods = #{monitorKubernetesPods}")) new_contents = new_contents.gsub("$AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER", "") rescue => errorStr - ConfigParseErrorLogger.logError("Exception while replacing default pod monitor settings: #{errorStr}") + puts "Exception while replacing default pod monitor settings: #{errorStr}" end return new_contents end @@ -99,7 +99,7 @@ def createPrometheusPluginsWithNamespaceSetting(monitorKubernetesPods, monitorKu new_contents = new_contents.gsub("$AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER", pluginConfigsWithNamespaces) return new_contents rescue => errorStr - ConfigParseErrorLogger.logError("Exception while creating prometheus input plugins to filter namespaces: #{errorStr}, using defaults") + puts "Exception while creating prometheus input plugins to filter namespaces: #{errorStr}, using defaults" replaceDefaultMonitorPodSettings(new_contents, monitorKubernetesPods) end end diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index 593cea743..1f755a38d 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -34,7 +34,7 @@ def parseConfigMap(path) return nil end rescue => errorStr - ConfigParseErrorLogger.logError("Exception while parsing toml config file: #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while parsing config map for log collection/env variable settings: #{errorStr}, using defaults") @excludePath = "*_kube-system_*.log" return nil end @@ -71,7 +71,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end rescue => errorStr - ConfigParseErrorLogger.logError("Exception while reading config settings for stdout log collection - #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while reading config map settings for stdout log collection - #{errorStr}, using defaults") end #Get stderr log config settings @@ -108,7 +108,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end rescue => errorStr - ConfigParseErrorLogger.logError("Exception while reading config settings for stderr log collection - #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while reading config map settings for stderr log collection - #{errorStr}, using defaults") end #Get environment variables log config settings @@ -118,7 +118,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Using config map setting for cluster level environment variable collection" end rescue => errorStr - ConfigParseErrorLogger.logError("Exception while reading config settings for cluster level environment variable collection - #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while reading config map settings for cluster level environment variable collection - #{errorStr}, using defaults") end end @@ -130,7 +130,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end puts "enable_health_model = #{@enable_health_model}" rescue => errorStr - ConfigParseErrorLogger.logError("Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while reading config map settings for health_model enabled setting - #{errorStr}, using defaults") @enable_health_model = false end end @@ -154,7 +154,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) populateSettingValuesFromConfigMap(configMapSettings) end else - puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults" + ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults") @excludePath = "*_kube-system_*.log" end @@ -187,6 +187,6 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' " puts "****************End Config Processing********************" else - ConfigParseErrorLogger.logError("Exception while opening file for writing config environment variables") + puts "Exception while opening file for writing config environment variables" puts "****************End Config Processing********************" end From 1e37e01a56e9b123a29b5c33eb0659a06ed24679 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 27 Sep 2019 17:38:20 -0700 Subject: [PATCH 116/117] changes --- .../scripts/tomlparser-prom-customconfig.rb | 10 +-- installer/scripts/tomlparser.rb | 12 +-- source/code/go/src/plugins/oms.go | 74 ++++++++----------- 3 files changed, 42 insertions(+), 54 deletions(-) diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb index 62dc15a6f..ab868f1a9 100644 --- a/installer/scripts/tomlparser-prom-customconfig.rb +++ b/installer/scripts/tomlparser-prom-customconfig.rb @@ -41,7 +41,7 @@ def parseConfigMap return nil end rescue => errorStr - ConfigParseErrorLogger.logError("Exception while parsing config map for prometheus config: #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while parsing config map for prometheus config: #{errorStr}, using defaults, please check config map for errors") return nil end end @@ -182,7 +182,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Successfully created telemetry file for replicaset" end else - ConfigParseErrorLogger.logError("Typecheck failed for prometheus config settings for replicaset, using defaults") + ConfigParseErrorLogger.logError("Typecheck failed for prometheus config settings for replicaset, using defaults, please use right types for all settings") end # end of type check condition rescue => errorStr ConfigParseErrorLogger.logError("Exception while parsing config file for prometheus config for replicaset: #{errorStr}, using defaults") @@ -237,10 +237,10 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Successfully created telemetry file for daemonset" end else - ConfigParseErrorLogger.logError("Typecheck failed for prometheus config settings for daemonset, using defaults") + ConfigParseErrorLogger.logError("Typecheck failed for prometheus config settings for daemonset, using defaults, please use right types for all settings") end # end of type check condition rescue => errorStr - ConfigParseErrorLogger.logError("Exception while parsing config file for prometheus config for daemonset: #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while parsing config file for prometheus config for daemonset: #{errorStr}, using defaults, please check correctness of configmap") puts "****************End Prometheus Config Processing********************" end end # end of controller type check @@ -259,7 +259,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end else if (File.file?(@promConfigMapMountPath)) - puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults" + ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported version") else puts "config::No configmap mounted for prometheus custom config, using defaults" end diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb index 1f755a38d..523f8c307 100644 --- a/installer/scripts/tomlparser.rb +++ b/installer/scripts/tomlparser.rb @@ -34,7 +34,7 @@ def parseConfigMap(path) return nil end rescue => errorStr - ConfigParseErrorLogger.logError("Exception while parsing config map for log collection/env variable settings: #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while parsing config map for log collection/env variable settings: #{errorStr}, using defaults, please check config map for errors") @excludePath = "*_kube-system_*.log" return nil end @@ -71,7 +71,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end rescue => errorStr - ConfigParseErrorLogger.logError("Exception while reading config map settings for stdout log collection - #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while reading config map settings for stdout log collection - #{errorStr}, using defaults, please check config map for errors") end #Get stderr log config settings @@ -108,7 +108,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end end rescue => errorStr - ConfigParseErrorLogger.logError("Exception while reading config map settings for stderr log collection - #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while reading config map settings for stderr log collection - #{errorStr}, using defaults, please check config map for errors") end #Get environment variables log config settings @@ -118,7 +118,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) puts "config::Using config map setting for cluster level environment variable collection" end rescue => errorStr - ConfigParseErrorLogger.logError("Exception while reading config map settings for cluster level environment variable collection - #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while reading config map settings for cluster level environment variable collection - #{errorStr}, using defaults, please check config map for errors") end end @@ -130,7 +130,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) end puts "enable_health_model = #{@enable_health_model}" rescue => errorStr - ConfigParseErrorLogger.logError("Exception while reading config map settings for health_model enabled setting - #{errorStr}, using defaults") + ConfigParseErrorLogger.logError("Exception while reading config map settings for health_model enabled setting - #{errorStr}, using defaults, please check config map for errors") @enable_health_model = false end end @@ -154,7 +154,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) populateSettingValuesFromConfigMap(configMapSettings) end else - ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults") + ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version") @excludePath = "*_kube-system_*.log" end diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 11b839c5f..078b57eb5 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -332,7 +332,7 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K var eventTimeStamp = ToString(record["time"]) containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"])) - Log("Updating config event hash - Locking for update \n ") + Log("Locked EventHashUpdateMutex for updating hash \n ") EventHashUpdateMutex.Lock() switch errType { case ConfigError: @@ -396,7 +396,7 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K } } EventHashUpdateMutex.Unlock() - Log("Updating config event hash - Unlocked after update \n ") + Log("Unlocked EventHashUpdateMutex after updating hash \n ") } // Function to get config error log records after iterating through the two hashes @@ -404,20 +404,18 @@ func flushKubeMonAgentEventRecords() { for ; true; <-KubeMonAgentConfigEventsSendTicker.C { if skipKubeMonEventsFlush != true { Log("In flushConfigErrorRecords\n") - var retries = 2 - var flushSuccessful = false - var resp *http.Response - var postError error - var elapsed time.Duration + // var resp *http.Response + // var postError error + // var elapsed time.Duration var laKubeMonAgentEventsRecords []laKubeMonAgentEvents telemetryDimensions := make(map[string]string) - start := time.Now() telemetryDimensions["ConfigErrorEventCount"] = strconv.Itoa(len(ConfigErrorEvent)) telemetryDimensions["PromScrapeErrorEventCount"] = strconv.Itoa(len(PromScrapeErrorEvent)) if (len(ConfigErrorEvent) > 0) || (len(PromScrapeErrorEvent) > 0) { EventHashUpdateMutex.Lock() + Log("Locked EventHashUpdateMutex for reading hashes\n") for k, v := range ConfigErrorEvent { tagJson, err := json.Marshal(v) @@ -460,7 +458,14 @@ func flushKubeMonAgentEventRecords() { laKubeMonAgentEventsRecords = append(laKubeMonAgentEventsRecords, laKubeMonAgentEventsRecord) } } + + //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour + for k := range PromScrapeErrorEvent { + delete(PromScrapeErrorEvent, k) + } + Log("PromScrapeErrorEvent cache cleared\n") EventHashUpdateMutex.Unlock() + Log("Unlocked EventHashUpdateMutex for reading hashes\n") } else { //Sending a record in case there are no errors to be able to differentiate between no data vs no errors tagsValue := KubeMonAgentEventTags{} @@ -505,46 +510,29 @@ func flushKubeMonAgentEventRecords() { req.Header.Set("x-ms-AzureResourceId", ResourceID) } - // Retry in case of failure - for retries > 0 { - Log("In retry block with retry count: %d", retries) - resp, postError = HTTPClient.Do(req) - Log("Post response status %s status code %d", resp.Status, resp.StatusCode) - elapsed = time.Since(start) - - if postError != nil { - message := fmt.Sprintf("Error when sending kubemonagentevent request %s \n", err.Error()) - Log(message) - Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) - retries -= 1 - } else { - if resp == nil || resp.StatusCode != 200 { - if resp != nil { - Log("Status %s Status Code %d", resp.Status, resp.StatusCode) - retries -= 1 - } - } else { - flushSuccessful = true - break - } - } - } + start := time.Now() + resp, postError = HTTPClient.Do(req) + elapsed = time.Since(start) - if resp != nil && resp.Body != nil { - defer resp.Body.Close() - } - if flushSuccessful == true { + if postError != nil { + message := fmt.Sprintf("Error when sending kubemonagentevent request %s \n", err.Error()) + Log(message) + Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) + } else if resp == nil || resp.StatusCode != 200 { + if resp != nil { + Log("Status %s Status Code %d", resp.Status, resp.StatusCode) + } + Log("Failed to flush %d records after %s", len(laKubeMonAgentEventsRecords), elapsed) + } else { numRecords := len(laKubeMonAgentEventsRecords) Log("Successfully flushed %d records in %s", numRecords, elapsed) + + // Send telemetry to AppInsights resource SendEvent(KubeMonAgentEventsFlushedEvent, telemetryDimensions) - //Clearing out the prometheus scrape hash so that it can be rebuilt with the errors in the next hour - EventHashUpdateMutex.Lock() - for k := range PromScrapeErrorEvent { - delete(PromScrapeErrorEvent, k) - } - Log("PromScrapeErrorEvent cache cleared\n") - EventHashUpdateMutex.Unlock() + } + if resp != nil && resp.Body != nil { + defer resp.Body.Close() } } } From 711d1c7561c6c824023938570c4c9dd7f806c5c0 Mon Sep 17 00:00:00 2001 From: rashmy Date: Fri, 27 Sep 2019 17:41:07 -0700 Subject: [PATCH 117/117] changes --- source/code/go/src/plugins/oms.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 078b57eb5..6d78455bd 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -404,9 +404,10 @@ func flushKubeMonAgentEventRecords() { for ; true; <-KubeMonAgentConfigEventsSendTicker.C { if skipKubeMonEventsFlush != true { Log("In flushConfigErrorRecords\n") - // var resp *http.Response - // var postError error - // var elapsed time.Duration + start := time.Now() + var resp *http.Response + var postError error + var elapsed time.Duration var laKubeMonAgentEventsRecords []laKubeMonAgentEvents telemetryDimensions := make(map[string]string) @@ -510,7 +511,6 @@ func flushKubeMonAgentEventRecords() { req.Header.Set("x-ms-AzureResourceId", ResourceID) } - start := time.Now() resp, postError = HTTPClient.Do(req) elapsed = time.Since(start)