From d41718bae765e67e1ae2fd7103eb6736cf901d98 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Sat, 22 Jan 2022 21:32:05 -0800 Subject: [PATCH 1/9] mdsd proc cpu and memory telemetry --- build/linux/installer/conf/telegraf.conf | 52 ++++++++++++++++-------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf index 0e4824e70..c3b675604 100644 --- a/build/linux/installer/conf/telegraf.conf +++ b/build/linux/installer/conf/telegraf.conf @@ -158,6 +158,26 @@ namepass = ["container.azm.ms/disk"] #fieldpass = ["used_percent"] +[[outputs.application_insights]] + ## Instrumentation key of the Application Insights resource. + instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY" + + ## Timeout for closing (default: 5s). + # timeout = "5s" + + ## Enable additional diagnostic logging. + # enable_diagnostic_logging = false + + ## Context Tag Sources add Application Insights context tags to a tag value. + ## + ## For list of allowed context tag keys see: + ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go + # [outputs.application_insights.context_tag_sources] + # "ai.cloud.role" = "kubernetes_container_name" + # "ai.cloud.roleInstance" = "kubernetes_pod_name" + namepass = ["agent_telemetry"] + #tagdrop = ["nodeName"] + ############################################################################### # PROCESSOR PLUGINS # ############################################################################### @@ -407,7 +427,7 @@ # Dummy plugin to test out toml parsing happens properly [[inputs.file]] - interval = "24h" + interval = "24h" files = ["test.json"] data_format = "json" @@ -550,21 +570,21 @@ #fieldpass = ["numContainers", "numContainersRunning", "numContainersStopped", "numContainersPaused", "numContainerImages"] # taginclude = ["nodeName"] -#[[inputs.procstat]] -# #name_prefix="t.azm.ms/" -# exe = "mdsd" -# interval = "10s" -# pid_finder = "native" -# pid_tag = true -# name_override = "agent_telemetry" -# fieldpass = ["cpu_usage", "memory_rss", "memory_swap", "memory_vms", "memory_stack"] -# [inputs.procstat.tags] -# Computer = "$NODE_NAME" -# AgentVersion = "$AGENT_VERSION" -# ControllerType = "$CONTROLLER_TYPE" -# AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" -# ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" -# Region = "$TELEMETRY_AKS_REGION" +[[inputs.procstat]] + #name_prefix="t.azm.ms/" + exe = "mdsd" + interval = "60s" + pid_finder = "native" + pid_tag = true + name_override = "agent_telemetry" + fieldpass = ["cpu_usage", "memory_rss"] + [inputs.procstat.tags] + Computer = "$NODE_NAME" + AgentVersion = "$AGENT_VERSION" + ControllerType = "$CONTROLLER_TYPE" + AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" + ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" + Region = "$TELEMETRY_AKS_REGION" # [[inputs.procstat]] # #name_prefix="container.azm.ms/" # exe = "ruby" From 0eec16be6e4dababded18a87b15a669e24884aa9 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 25 Jan 2022 11:30:23 -0800 Subject: [PATCH 2/9] write ai logs to file and telemetry for mdsd proc --- .../conf/telegraf-prom-side-car.conf | 39 +++++++++++++++- build/linux/installer/conf/telegraf-rs.conf | 45 ++++++++++++++++--- build/linux/installer/conf/telegraf.conf | 2 +- .../installer/datafiles/base_container.data | 4 +- .../templates/omsagent-daemonset.yaml | 3 ++ .../templates/omsagent-deployment.yaml | 6 +++ kubernetes/omsagent.yaml | 10 ++++- .../ruby/ApplicationInsightsUtility.rb | 14 ++++-- .../channel/asynchronous_sender.rb | 5 ++- .../channel/sender_base.rb | 31 +++++++------ 10 files changed, 131 insertions(+), 28 deletions(-) diff --git a/build/linux/installer/conf/telegraf-prom-side-car.conf b/build/linux/installer/conf/telegraf-prom-side-car.conf index 1b6bab9f9..25d2cd23b 100644 --- a/build/linux/installer/conf/telegraf-prom-side-car.conf +++ b/build/linux/installer/conf/telegraf-prom-side-car.conf @@ -111,6 +111,26 @@ data_format = "json" namedrop = ["agent_telemetry", "file"] +[[outputs.application_insights]] + ## Instrumentation key of the Application Insights resource. + instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY" + + ## Timeout for closing (default: 5s). + # timeout = "5s" + + ## Enable additional diagnostic logging. + enable_diagnostic_logging = false + + ## Context Tag Sources add Application Insights context tags to a tag value. + ## + ## For list of allowed context tag keys see: + ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go + # [outputs.application_insights.context_tag_sources] + # "ai.cloud.role" = "kubernetes_container_name" + # "ai.cloud.roleInstance" = "kubernetes_pod_name" + namepass = ["agent_telemetry"] + #tagdrop = ["nodeName"] + ############################################################################### # PROCESSOR PLUGINS # ############################################################################### @@ -121,7 +141,7 @@ # Dummy plugin to test out toml parsing happens properly [[inputs.file]] - interval = "24h" + interval = "24h" files = ["test.json"] data_format = "json" @@ -166,3 +186,20 @@ $AZMON_TELEGRAF_CUSTOM_PROM_PLUGINS_WITH_NAMESPACE_FILTER ## OSM Prometheus configuration $AZMON_TELEGRAF_OSM_PROM_PLUGINS + +[[inputs.procstat]] + #name_prefix="t.azm.ms/" + exe = "mdsd" + interval = "60s" + pid_finder = "native" + pid_tag = true + name_override = "agent_telemetry" + fieldpass = ["cpu_usage", "memory_rss"] + [inputs.procstat.tags] + Computer = "$NODE_NAME" + AgentVersion = "$AGENT_VERSION" + ControllerType = "$CONTROLLER_TYPE" + AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" + ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" + Region = "$TELEMETRY_AKS_REGION" + ContainerType = "$CONTAINER_TYPE" \ No newline at end of file diff --git a/build/linux/installer/conf/telegraf-rs.conf b/build/linux/installer/conf/telegraf-rs.conf index 5de35d82c..c7154a78e 100644 --- a/build/linux/installer/conf/telegraf-rs.conf +++ b/build/linux/installer/conf/telegraf-rs.conf @@ -124,6 +124,26 @@ namedrop = ["agent_telemetry", "file"] #tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"] +[[outputs.application_insights]] + ## Instrumentation key of the Application Insights resource. + instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY" + + ## Timeout for closing (default: 5s). + # timeout = "5s" + + ## Enable additional diagnostic logging. + enable_diagnostic_logging = false + + ## Context Tag Sources add Application Insights context tags to a tag value. + ## + ## For list of allowed context tag keys see: + ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go + # [outputs.application_insights.context_tag_sources] + # "ai.cloud.role" = "kubernetes_container_name" + # "ai.cloud.roleInstance" = "kubernetes_pod_name" + namepass = ["agent_telemetry"] + #tagdrop = ["nodeName"] + ############################################################################### # PROCESSOR PLUGINS # ############################################################################### @@ -369,7 +389,7 @@ # report_active = true # fieldpass = ["usage_active","cluster","node","host","device"] # taginclude = ["cluster","cpu","node"] - + # Read metrics about disk usage by mount point @@ -377,7 +397,7 @@ ## By default stats will be gathered for all mount points. ## Set mount_points will restrict the stats to only the specified mount points. # mount_points = ["/"] - + ## Ignore mount points by filesystem type. # ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"] # fieldpass = ["free", "used", "used_percent"] @@ -520,7 +540,7 @@ # Dummy plugin to test out toml parsing happens properly [[inputs.file]] - interval = "24h" + interval = "24h" files = ["test.json"] data_format = "json" @@ -530,10 +550,10 @@ ## An array of urls to scrape metrics from. urls = $AZMON_TELEGRAF_CUSTOM_PROM_URLS - + ## An array of Kubernetes services to scrape metrics from. kubernetes_services = $AZMON_TELEGRAF_CUSTOM_PROM_K8S_SERVICES - + ## Scrape Kubernetes pods for the following prometheus annotations: ## - prometheus.io/scrape: Enable scraping for this pod ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to @@ -648,3 +668,18 @@ $AZMON_TELEGRAF_OSM_PROM_PLUGINS #[inputs.prometheus.tagpass] # operation_type = ["create_container", "remove_container", "pull_image"] +[[inputs.procstat]] + #name_prefix="t.azm.ms/" + exe = "mdsd" + interval = "60s" + pid_finder = "native" + pid_tag = true + name_override = "agent_telemetry" + fieldpass = ["cpu_usage", "memory_rss"] + [inputs.procstat.tags] + Computer = "$NODE_NAME" + AgentVersion = "$AGENT_VERSION" + ControllerType = "$CONTROLLER_TYPE" + AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" + ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" + Region = "$TELEMETRY_AKS_REGION" \ No newline at end of file diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf index c3b675604..d095beb41 100644 --- a/build/linux/installer/conf/telegraf.conf +++ b/build/linux/installer/conf/telegraf.conf @@ -166,7 +166,7 @@ # timeout = "5s" ## Enable additional diagnostic logging. - # enable_diagnostic_logging = false + enable_diagnostic_logging = false ## Context Tag Sources add Application Insights context tags to a tag value. ## diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data index 985c73a17..9fc7ce08f 100644 --- a/build/linux/installer/datafiles/base_container.data +++ b/build/linux/installer/datafiles/base_container.data @@ -286,6 +286,8 @@ chmod 666 /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log touch /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log chmod 666 /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log +touch /var/opt/microsoft/docker-cimprov/log/appinsights_error.log +chmod 666 /var/opt/microsoft/docker-cimprov/log/appinsights_error.log touch /var/opt/microsoft/docker-cimprov/log/fluentd.log chmod 666 /var/opt/microsoft/docker-cimprov/log/fluentd.log @@ -309,7 +311,7 @@ if ${{PERFORMING_UPGRADE_NOT}}; then rmdir /etc/opt/microsoft/docker-cimprov/conf 2> /dev/null rmdir /etc/opt/microsoft/docker-cimprov 2> /dev/null rmdir /etc/opt/microsoft 2> /dev/null - rmdir /etc/opt 2> /dev/null + rmdir /etc/opt 2> /dev/null fi %Preinstall_0 diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml index 8e5513f91..66265d1b7 100644 --- a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml +++ b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml @@ -174,6 +174,9 @@ spec: securityContext: privileged: true volumeMounts: + - mountPath: /hostfs + name: host-root + readOnly: true - mountPath: /etc/kubernetes/host name: azure-json-path - mountPath: /etc/omsagent-secret diff --git a/charts/azuremonitor-containers/templates/omsagent-deployment.yaml b/charts/azuremonitor-containers/templates/omsagent-deployment.yaml index 1eaf7f652..eda0d8f97 100644 --- a/charts/azuremonitor-containers/templates/omsagent-deployment.yaml +++ b/charts/azuremonitor-containers/templates/omsagent-deployment.yaml @@ -88,6 +88,9 @@ spec: protocol: TCP name: in-rs-tcp volumeMounts: + - mountPath: /hostfs + name: host-root + readOnly: true - mountPath: /var/run/host name: docker-sock - mountPath: /var/log @@ -130,6 +133,9 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} volumes: + - name: host-root + hostPath: + path: / - name: docker-sock hostPath: path: /var/run diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 152f2313b..550ae58e9 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -492,6 +492,9 @@ spec: # securityContext: # privileged: true # volumeMounts: + # - mountPath: /hostfs + # name: host-root + # readOnly: true # - mountPath: /etc/kubernetes/host # name: azure-json-path # - mountPath: /etc/omsagent-secret @@ -648,6 +651,9 @@ spec: protocol: TCP name: in-rs-tcp volumeMounts: + - mountPath: /hostfs + name: host-root + readOnly: true - mountPath: /var/run/host name: docker-sock - mountPath: /var/log @@ -713,6 +719,9 @@ spec: - operator: "Exists" effect: "PreferNoSchedule" volumes: + - name: host-root + hostPath: + path: / - name: docker-sock hostPath: path: /var/run @@ -933,4 +942,3 @@ spec: names: plural: healthstates kind: HealthState - \ No newline at end of file diff --git a/source/plugins/ruby/ApplicationInsightsUtility.rb b/source/plugins/ruby/ApplicationInsightsUtility.rb index 7691304a6..10f2c7926 100644 --- a/source/plugins/ruby/ApplicationInsightsUtility.rb +++ b/source/plugins/ruby/ApplicationInsightsUtility.rb @@ -98,6 +98,12 @@ def initializeUtility() elsif !encodedAppInsightsKey.nil? decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey) + LogPath = "/var/opt/microsoft/docker-cimprov/log/appinsights_error.log" + if @@isWindows + LogPath = "/etc/omsagentwindows/appinsights_error.log" + end + logger = Logger.new(LogPath, 1, 2 * 1024 * 1024) + #override ai endpoint if its available otherwise use default. if appInsightsEndpoint && !appInsightsEndpoint.nil? && !appInsightsEndpoint.empty? $log.info("AppInsightsUtility: Telemetry client uses overrided endpoint url : #{appInsightsEndpoint}") @@ -105,20 +111,20 @@ def initializeUtility() #telemetrySynchronousQueue = ApplicationInsights::Channel::SynchronousQueue.new(telemetrySynchronousSender) #telemetryChannel = ApplicationInsights::Channel::TelemetryChannel.new nil, telemetrySynchronousQueue if !isProxyConfigured - sender = ApplicationInsights::Channel::AsynchronousSender.new appInsightsEndpoint + sender = ApplicationInsights::Channel::AsynchronousSender.new appInsightsEndpoint, logger else $log.info("AppInsightsUtility: Telemetry client uses provided proxy configuration since proxy configured") - sender = ApplicationInsights::Channel::AsynchronousSender.new appInsightsEndpoint, @@proxy + sender = ApplicationInsights::Channel::AsynchronousSender.new appInsightsEndpoint, logger, @@proxy end queue = ApplicationInsights::Channel::AsynchronousQueue.new sender channel = ApplicationInsights::Channel::TelemetryChannel.new nil, queue @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey, channel else if !isProxyConfigured - sender = ApplicationInsights::Channel::AsynchronousSender.new + sender = ApplicationInsights::Channel::AsynchronousSender.new nil, logger else $log.info("AppInsightsUtility: Telemetry client uses provided proxy configuration since proxy configured") - sender = ApplicationInsights::Channel::AsynchronousSender.new nil, @@proxy + sender = ApplicationInsights::Channel::AsynchronousSender.new nil, logger, @@proxy end queue = ApplicationInsights::Channel::AsynchronousQueue.new sender channel = ApplicationInsights::Channel::TelemetryChannel.new nil, queue diff --git a/source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb b/source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb index 4786aa1d9..2cd08674f 100644 --- a/source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb +++ b/source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb @@ -20,9 +20,10 @@ class AsynchronousSender < SenderBase SERVICE_ENDPOINT_URI = 'https://dc.services.visualstudio.com/v2/track' # Initializes a new instance of the class. # @param [String] service_endpoint_uri the address of the service to send + # @param [Logger] instance of the logger to write the logs # @param [Hash] proxy server configuration to send (optional) # telemetry data to. - def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI, proxy = {}) + def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI, logger, proxy = {}) # callers which requires proxy dont require to maintain service endpoint uri which potentially can change if service_endpoint_uri.nil? || service_endpoint_uri.empty? service_endpoint_uri = SERVICE_ENDPOINT_URI @@ -33,7 +34,7 @@ def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI, proxy = {}) @lock_work_thread = Mutex.new @work_thread = nil @start_notification_processed = true - super service_endpoint_uri, proxy + super service_endpoint_uri, logger, proxy end # The time span in seconds at which the the worker thread will check the diff --git a/source/plugins/ruby/lib/application_insights/channel/sender_base.rb b/source/plugins/ruby/lib/application_insights/channel/sender_base.rb index bedbae4ee..a6b7966db 100644 --- a/source/plugins/ruby/lib/application_insights/channel/sender_base.rb +++ b/source/plugins/ruby/lib/application_insights/channel/sender_base.rb @@ -1,9 +1,9 @@ -require 'yajl/json_gem' -require 'net/http' -require 'openssl' -require 'stringio' -require 'zlib' -require 'logger' +require "yajl/json_gem" +require "net/http" +require "openssl" +require "stringio" +require "zlib" +require "logger" module ApplicationInsights module Channel @@ -16,13 +16,18 @@ module Channel class SenderBase # Initializes a new instance of the class. # @param [String] service_endpoint_uri the address of the service to send + # @param [Logger] instance of the logger to write the logs # @param [Hash] proxy server configuration to send (optional) # telemetry data to. - def initialize(service_endpoint_uri, proxy = {}) + def initialize(service_endpoint_uri, logger, proxy = {}) @service_endpoint_uri = service_endpoint_uri @queue = nil @send_buffer_size = 100 - @logger = Logger.new(STDOUT) + if !logger.nil? && !logger.empty? + @logger = logger + else + @logger = Logger.new(STDOUT) + end @proxy = proxy end @@ -53,9 +58,9 @@ def initialize(service_endpoint_uri, proxy = {}) def send(data_to_send) uri = URI(@service_endpoint_uri) headers = { - 'Accept' => 'application/json', - 'Content-Type' => 'application/json; charset=utf-8', - 'Content-Encoding' => 'gzip' + "Accept" => "application/json", + "Content-Type" => "application/json; charset=utf-8", + "Content-Encoding" => "gzip", } request = Net::HTTP::Post.new(uri.path, headers) @@ -69,7 +74,7 @@ def send(data_to_send) else http = Net::HTTP.new(uri.hostname, uri.port, @proxy[:addr], @proxy[:port], @proxy[:user], @proxy[:pass]) end - if uri.scheme.downcase == 'https' + if uri.scheme.downcase == "https" http.use_ssl = true http.verify_mode = OpenSSL::SSL::VERIFY_PEER end @@ -78,7 +83,7 @@ def send(data_to_send) http.finish if http.started? if !response.kind_of? Net::HTTPSuccess - @logger.warn('application_insights') { "Failed to send data: #{response.message}" } + @logger.warn("application_insights") { "Failed to send data: #{response.message}" } end end From 4a5ef5ef0357126d9b91b7b828e6d88f6a0d281b Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 25 Jan 2022 13:22:57 -0800 Subject: [PATCH 3/9] write ai logs to file and telemetry for mdsd proc --- .../installer/conf/telegraf-prom-side-car.conf | 2 +- source/plugins/ruby/ApplicationInsightsUtility.rb | 15 ++++++++------- .../channel/asynchronous_sender.rb | 12 ++++++------ .../channel/synchronous_sender.rb | 5 +++-- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/build/linux/installer/conf/telegraf-prom-side-car.conf b/build/linux/installer/conf/telegraf-prom-side-car.conf index 25d2cd23b..ea3aafc17 100644 --- a/build/linux/installer/conf/telegraf-prom-side-car.conf +++ b/build/linux/installer/conf/telegraf-prom-side-car.conf @@ -202,4 +202,4 @@ $AZMON_TELEGRAF_OSM_PROM_PLUGINS AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" Region = "$TELEMETRY_AKS_REGION" - ContainerType = "$CONTAINER_TYPE" \ No newline at end of file + ContainerType = "$CONTAINER_TYPE" diff --git a/source/plugins/ruby/ApplicationInsightsUtility.rb b/source/plugins/ruby/ApplicationInsightsUtility.rb index 10f2c7926..eb143c4ba 100644 --- a/source/plugins/ruby/ApplicationInsightsUtility.rb +++ b/source/plugins/ruby/ApplicationInsightsUtility.rb @@ -98,11 +98,12 @@ def initializeUtility() elsif !encodedAppInsightsKey.nil? decodedAppInsightsKey = Base64.decode64(encodedAppInsightsKey) - LogPath = "/var/opt/microsoft/docker-cimprov/log/appinsights_error.log" if @@isWindows - LogPath = "/etc/omsagentwindows/appinsights_error.log" + logPath = "/etc/omsagentwindows/appinsights_error.log" + else + logPath = "/var/opt/microsoft/docker-cimprov/log/appinsights_error.log" end - logger = Logger.new(LogPath, 1, 2 * 1024 * 1024) + aiLogger = Logger.new(logPath, 1, 2 * 1024 * 1024) #override ai endpoint if its available otherwise use default. if appInsightsEndpoint && !appInsightsEndpoint.nil? && !appInsightsEndpoint.empty? @@ -111,20 +112,20 @@ def initializeUtility() #telemetrySynchronousQueue = ApplicationInsights::Channel::SynchronousQueue.new(telemetrySynchronousSender) #telemetryChannel = ApplicationInsights::Channel::TelemetryChannel.new nil, telemetrySynchronousQueue if !isProxyConfigured - sender = ApplicationInsights::Channel::AsynchronousSender.new appInsightsEndpoint, logger + sender = ApplicationInsights::Channel::AsynchronousSender.new appInsightsEndpoint, aiLogger else $log.info("AppInsightsUtility: Telemetry client uses provided proxy configuration since proxy configured") - sender = ApplicationInsights::Channel::AsynchronousSender.new appInsightsEndpoint, logger, @@proxy + sender = ApplicationInsights::Channel::AsynchronousSender.new appInsightsEndpoint, aiLogger, @@proxy end queue = ApplicationInsights::Channel::AsynchronousQueue.new sender channel = ApplicationInsights::Channel::TelemetryChannel.new nil, queue @@Tc = ApplicationInsights::TelemetryClient.new decodedAppInsightsKey, channel else if !isProxyConfigured - sender = ApplicationInsights::Channel::AsynchronousSender.new nil, logger + sender = ApplicationInsights::Channel::AsynchronousSender.new nil, aiLogger else $log.info("AppInsightsUtility: Telemetry client uses provided proxy configuration since proxy configured") - sender = ApplicationInsights::Channel::AsynchronousSender.new nil, logger, @@proxy + sender = ApplicationInsights::Channel::AsynchronousSender.new nil, aiLogger, @@proxy end queue = ApplicationInsights::Channel::AsynchronousQueue.new sender channel = ApplicationInsights::Channel::TelemetryChannel.new nil, queue diff --git a/source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb b/source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb index 2cd08674f..df2138b3a 100644 --- a/source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb +++ b/source/plugins/ruby/lib/application_insights/channel/asynchronous_sender.rb @@ -1,5 +1,5 @@ -require_relative 'sender_base' -require 'thread' +require_relative "sender_base" +require "thread" module ApplicationInsights module Channel @@ -17,13 +17,13 @@ module Channel # If no queue items are found for {#send_time} seconds, the worker thread # will shut down (and {#start} will need to be called again). class AsynchronousSender < SenderBase - SERVICE_ENDPOINT_URI = 'https://dc.services.visualstudio.com/v2/track' + SERVICE_ENDPOINT_URI = "https://dc.services.visualstudio.com/v2/track" # Initializes a new instance of the class. # @param [String] service_endpoint_uri the address of the service to send - # @param [Logger] instance of the logger to write the logs + # @param [Logger] instance of the logger to write the logs (optional) # @param [Hash] proxy server configuration to send (optional) # telemetry data to. - def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI, logger, proxy = {}) + def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI, logger = nil, proxy = {}) # callers which requires proxy dont require to maintain service endpoint uri which potentially can change if service_endpoint_uri.nil? || service_endpoint_uri.empty? service_endpoint_uri = SERVICE_ENDPOINT_URI @@ -131,7 +131,7 @@ def run rescue Exception => e # Make sure work_thread sets to nil when it terminates abnormally @work_thread = nil - @logger.error('application_insights') { "Asynchronous sender work thread terminated abnormally: #{e.to_s}" } + @logger.error("application_insights") { "Asynchronous sender work thread terminated abnormally: #{e.to_s}" } end end end diff --git a/source/plugins/ruby/lib/application_insights/channel/synchronous_sender.rb b/source/plugins/ruby/lib/application_insights/channel/synchronous_sender.rb index 597e97b9e..2bb212026 100644 --- a/source/plugins/ruby/lib/application_insights/channel/synchronous_sender.rb +++ b/source/plugins/ruby/lib/application_insights/channel/synchronous_sender.rb @@ -8,14 +8,15 @@ class SynchronousSender < SenderBase SERVICE_ENDPOINT_URI = "https://dc.services.visualstudio.com/v2/track" # Initializes a new instance of the class. # @param [String] service_endpoint_uri the address of the service to send + # @param [Logger] instance of the logger to write the logs (optional) # @param [Hash] proxy server configuration to send (optional) # telemetry data to. - def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI, proxy = {}) + def initialize(service_endpoint_uri = SERVICE_ENDPOINT_URI, logger = nil, proxy = {}) # callers which requires proxy dont require to maintain service endpoint uri which potentially can change if service_endpoint_uri.nil? || service_endpoint_uri.empty? service_endpoint_uri = SERVICE_ENDPOINT_URI end - super service_endpoint_uri, proxy + super service_endpoint_uri, logger, proxy end end end From 04ead4c0a3b1de6a9c6d114dc79ac6ee34b638c4 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 25 Jan 2022 13:24:39 -0800 Subject: [PATCH 4/9] write ai logs to file and telemetry for mdsd proc --- build/linux/installer/conf/telegraf-rs.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/linux/installer/conf/telegraf-rs.conf b/build/linux/installer/conf/telegraf-rs.conf index c7154a78e..f3d98f076 100644 --- a/build/linux/installer/conf/telegraf-rs.conf +++ b/build/linux/installer/conf/telegraf-rs.conf @@ -682,4 +682,4 @@ $AZMON_TELEGRAF_OSM_PROM_PLUGINS ControllerType = "$CONTROLLER_TYPE" AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" - Region = "$TELEMETRY_AKS_REGION" \ No newline at end of file + Region = "$TELEMETRY_AKS_REGION" From 5bda015c9fb1d69a05d2bec50f5e8a7a28d5c1d2 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 27 Jan 2022 18:19:21 -0800 Subject: [PATCH 5/9] fix pr feedback --- .../installer/conf/telegraf-prom-side-car.conf | 16 +++++++++++++++- build/linux/installer/conf/telegraf-rs.conf | 9 ++++++++- build/linux/installer/conf/telegraf.conf | 10 ++++++++-- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/build/linux/installer/conf/telegraf-prom-side-car.conf b/build/linux/installer/conf/telegraf-prom-side-car.conf index ea3aafc17..0a02b1e56 100644 --- a/build/linux/installer/conf/telegraf-prom-side-car.conf +++ b/build/linux/installer/conf/telegraf-prom-side-car.conf @@ -139,6 +139,20 @@ [processors.converter.fields] float = ["*"] +############################################################################### +# AGGREGATOR PLUGINS # +############################################################################### +[[aggregators.quantile]] + period = "30m" + drop_original = true + quantiles = [0.95] + algorithm = "t-digest" + compression = 100.0 + namepass = ["agent_telemetry"] + +############################################################################### +# INPUT PLUGINS # +############################################################################### # Dummy plugin to test out toml parsing happens properly [[inputs.file]] interval = "24h" @@ -188,7 +202,7 @@ $AZMON_TELEGRAF_CUSTOM_PROM_PLUGINS_WITH_NAMESPACE_FILTER $AZMON_TELEGRAF_OSM_PROM_PLUGINS [[inputs.procstat]] - #name_prefix="t.azm.ms/" + name_prefix="t.azm.ms/" exe = "mdsd" interval = "60s" pid_finder = "native" diff --git a/build/linux/installer/conf/telegraf-rs.conf b/build/linux/installer/conf/telegraf-rs.conf index f3d98f076..61ba144ae 100644 --- a/build/linux/installer/conf/telegraf-rs.conf +++ b/build/linux/installer/conf/telegraf-rs.conf @@ -313,6 +313,13 @@ ############################################################################### # AGGREGATOR PLUGINS # ############################################################################### +[[aggregators.quantile]] + period = "30m" + drop_original = true + quantiles = [0.95] + algorithm = "t-digest" + compression = 100.0 + namepass = ["agent_telemetry"] # # Keep the aggregate basicstats of each metric passing through. # [[aggregators.basicstats]] @@ -669,7 +676,7 @@ $AZMON_TELEGRAF_OSM_PROM_PLUGINS # operation_type = ["create_container", "remove_container", "pull_image"] [[inputs.procstat]] - #name_prefix="t.azm.ms/" + name_prefix="t.azm.ms/" exe = "mdsd" interval = "60s" pid_finder = "native" diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf index d095beb41..45d040068 100644 --- a/build/linux/installer/conf/telegraf.conf +++ b/build/linux/installer/conf/telegraf.conf @@ -348,7 +348,13 @@ ############################################################################### # AGGREGATOR PLUGINS # ############################################################################### - +[[aggregators.quantile]] + period = "30m" + drop_original = true + quantiles = [0.95] + algorithm = "t-digest" + compression = 100.0 + namepass = ["agent_telemetry"] # # Keep the aggregate basicstats of each metric passing through. # [[aggregators.basicstats]] # ## General Aggregator Arguments: @@ -571,7 +577,7 @@ # taginclude = ["nodeName"] [[inputs.procstat]] - #name_prefix="t.azm.ms/" + name_prefix="t.azm.ms/" exe = "mdsd" interval = "60s" pid_finder = "native" From b10f4983be153b838cb9f55299d133473af7b315 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 27 Jan 2022 22:08:53 -0800 Subject: [PATCH 6/9] use name_prefix --- build/linux/installer/conf/telegraf-prom-side-car.conf | 6 +++--- build/linux/installer/conf/telegraf-rs.conf | 6 +++--- build/linux/installer/conf/telegraf.conf | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/build/linux/installer/conf/telegraf-prom-side-car.conf b/build/linux/installer/conf/telegraf-prom-side-car.conf index 0a02b1e56..8f807afde 100644 --- a/build/linux/installer/conf/telegraf-prom-side-car.conf +++ b/build/linux/installer/conf/telegraf-prom-side-car.conf @@ -109,7 +109,7 @@ ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "json" - namedrop = ["agent_telemetry", "file"] + namedrop = ["t.azm.ms/agent_telemetry", "file"] [[outputs.application_insights]] ## Instrumentation key of the Application Insights resource. @@ -128,7 +128,7 @@ # [outputs.application_insights.context_tag_sources] # "ai.cloud.role" = "kubernetes_container_name" # "ai.cloud.roleInstance" = "kubernetes_pod_name" - namepass = ["agent_telemetry"] + namepass = ["t.azm.ms/agent_telemetry"] #tagdrop = ["nodeName"] ############################################################################### @@ -148,7 +148,7 @@ quantiles = [0.95] algorithm = "t-digest" compression = 100.0 - namepass = ["agent_telemetry"] + namepass = ["t.azm.ms/agent_telemetry"] ############################################################################### # INPUT PLUGINS # diff --git a/build/linux/installer/conf/telegraf-rs.conf b/build/linux/installer/conf/telegraf-rs.conf index 61ba144ae..888b94b8b 100644 --- a/build/linux/installer/conf/telegraf-rs.conf +++ b/build/linux/installer/conf/telegraf-rs.conf @@ -121,7 +121,7 @@ ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "json" - namedrop = ["agent_telemetry", "file"] + namedrop = ["t.azm.ms/agent_telemetry", "file"] #tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"] [[outputs.application_insights]] @@ -141,7 +141,7 @@ # [outputs.application_insights.context_tag_sources] # "ai.cloud.role" = "kubernetes_container_name" # "ai.cloud.roleInstance" = "kubernetes_pod_name" - namepass = ["agent_telemetry"] + namepass = ["t.azm.ms/agent_telemetry"] #tagdrop = ["nodeName"] ############################################################################### @@ -319,7 +319,7 @@ quantiles = [0.95] algorithm = "t-digest" compression = 100.0 - namepass = ["agent_telemetry"] + namepass = ["t.azm.ms/agent_telemetry"] # # Keep the aggregate basicstats of each metric passing through. # [[aggregators.basicstats]] diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf index 45d040068..feae7a599 100644 --- a/build/linux/installer/conf/telegraf.conf +++ b/build/linux/installer/conf/telegraf.conf @@ -120,7 +120,7 @@ ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "json" - namedrop = ["agent_telemetry", "file"] + namedrop = ["t.azm.ms/agent_telemetry", "file"] #tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"] # Output to send MDM metrics to fluent bit and then route it to fluentD @@ -175,7 +175,7 @@ # [outputs.application_insights.context_tag_sources] # "ai.cloud.role" = "kubernetes_container_name" # "ai.cloud.roleInstance" = "kubernetes_pod_name" - namepass = ["agent_telemetry"] + namepass = ["t.azm.ms/agent_telemetry"] #tagdrop = ["nodeName"] ############################################################################### @@ -354,7 +354,7 @@ quantiles = [0.95] algorithm = "t-digest" compression = 100.0 - namepass = ["agent_telemetry"] + namepass = ["t.azm.ms/agent_telemetry"] # # Keep the aggregate basicstats of each metric passing through. # [[aggregators.basicstats]] # ## General Aggregator Arguments: From 8cc5c01309a066ea50bd82b3843016817228c2f4 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Fri, 28 Jan 2022 16:35:15 -0800 Subject: [PATCH 7/9] remove mdsd telemetry changes --- .../conf/telegraf-prom-side-car.conf | 54 ++++++++-------- build/linux/installer/conf/telegraf-rs.conf | 46 +++++++------- build/linux/installer/conf/telegraf.conf | 62 +++++++++---------- .../templates/omsagent-daemonset.yaml | 3 - .../templates/omsagent-deployment.yaml | 6 -- kubernetes/omsagent.yaml | 18 ++---- 6 files changed, 86 insertions(+), 103 deletions(-) diff --git a/build/linux/installer/conf/telegraf-prom-side-car.conf b/build/linux/installer/conf/telegraf-prom-side-car.conf index 8f807afde..4519adfbe 100644 --- a/build/linux/installer/conf/telegraf-prom-side-car.conf +++ b/build/linux/installer/conf/telegraf-prom-side-car.conf @@ -109,26 +109,26 @@ ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "json" - namedrop = ["t.azm.ms/agent_telemetry", "file"] - -[[outputs.application_insights]] - ## Instrumentation key of the Application Insights resource. - instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY" - - ## Timeout for closing (default: 5s). - # timeout = "5s" - - ## Enable additional diagnostic logging. - enable_diagnostic_logging = false - - ## Context Tag Sources add Application Insights context tags to a tag value. - ## - ## For list of allowed context tag keys see: - ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go - # [outputs.application_insights.context_tag_sources] - # "ai.cloud.role" = "kubernetes_container_name" - # "ai.cloud.roleInstance" = "kubernetes_pod_name" - namepass = ["t.azm.ms/agent_telemetry"] + namedrop = ["agent_telemetry", "file"] + +# [[outputs.application_insights]] +# ## Instrumentation key of the Application Insights resource. +# instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY" + +# ## Timeout for closing (default: 5s). +# # timeout = "5s" + +# ## Enable additional diagnostic logging. +# enable_diagnostic_logging = false + +# ## Context Tag Sources add Application Insights context tags to a tag value. +# ## +# ## For list of allowed context tag keys see: +# ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go +# # [outputs.application_insights.context_tag_sources] +# # "ai.cloud.role" = "kubernetes_container_name" +# # "ai.cloud.roleInstance" = "kubernetes_pod_name" +# namepass = ["t.azm.ms/agent_telemetry"] #tagdrop = ["nodeName"] ############################################################################### @@ -142,13 +142,13 @@ ############################################################################### # AGGREGATOR PLUGINS # ############################################################################### -[[aggregators.quantile]] - period = "30m" - drop_original = true - quantiles = [0.95] - algorithm = "t-digest" - compression = 100.0 - namepass = ["t.azm.ms/agent_telemetry"] +# [[aggregators.quantile]] +# period = "30m" +# drop_original = true +# quantiles = [0.95] +# algorithm = "t-digest" +# compression = 100.0 +# namepass = ["agent_telemetry"] ############################################################################### # INPUT PLUGINS # diff --git a/build/linux/installer/conf/telegraf-rs.conf b/build/linux/installer/conf/telegraf-rs.conf index 888b94b8b..cdb2a1c65 100644 --- a/build/linux/installer/conf/telegraf-rs.conf +++ b/build/linux/installer/conf/telegraf-rs.conf @@ -121,27 +121,27 @@ ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "json" - namedrop = ["t.azm.ms/agent_telemetry", "file"] + namedrop = ["agent_telemetry", "file"] #tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"] -[[outputs.application_insights]] - ## Instrumentation key of the Application Insights resource. - instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY" +# [[outputs.application_insights]] +# ## Instrumentation key of the Application Insights resource. +# instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY" - ## Timeout for closing (default: 5s). - # timeout = "5s" +# ## Timeout for closing (default: 5s). +# # timeout = "5s" - ## Enable additional diagnostic logging. - enable_diagnostic_logging = false +# ## Enable additional diagnostic logging. +# enable_diagnostic_logging = false - ## Context Tag Sources add Application Insights context tags to a tag value. - ## - ## For list of allowed context tag keys see: - ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go - # [outputs.application_insights.context_tag_sources] - # "ai.cloud.role" = "kubernetes_container_name" - # "ai.cloud.roleInstance" = "kubernetes_pod_name" - namepass = ["t.azm.ms/agent_telemetry"] +# ## Context Tag Sources add Application Insights context tags to a tag value. +# ## +# ## For list of allowed context tag keys see: +# ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go +# # [outputs.application_insights.context_tag_sources] +# # "ai.cloud.role" = "kubernetes_container_name" +# # "ai.cloud.roleInstance" = "kubernetes_pod_name" +# namepass = ["t.azm.ms/agent_telemetry"] #tagdrop = ["nodeName"] ############################################################################### @@ -313,13 +313,13 @@ ############################################################################### # AGGREGATOR PLUGINS # ############################################################################### -[[aggregators.quantile]] - period = "30m" - drop_original = true - quantiles = [0.95] - algorithm = "t-digest" - compression = 100.0 - namepass = ["t.azm.ms/agent_telemetry"] +# [[aggregators.quantile]] +# period = "30m" +# drop_original = true +# quantiles = [0.95] +# algorithm = "t-digest" +# compression = 100.0 +# namepass = ["agent_telemetry"] # # Keep the aggregate basicstats of each metric passing through. # [[aggregators.basicstats]] diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf index feae7a599..81110faa2 100644 --- a/build/linux/installer/conf/telegraf.conf +++ b/build/linux/installer/conf/telegraf.conf @@ -120,7 +120,7 @@ ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "json" - namedrop = ["t.azm.ms/agent_telemetry", "file"] + namedrop = ["agent_telemetry", "file"] #tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"] # Output to send MDM metrics to fluent bit and then route it to fluentD @@ -158,15 +158,15 @@ namepass = ["container.azm.ms/disk"] #fieldpass = ["used_percent"] -[[outputs.application_insights]] - ## Instrumentation key of the Application Insights resource. - instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY" +# [[outputs.application_insights]] +# ## Instrumentation key of the Application Insights resource. +# instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY" - ## Timeout for closing (default: 5s). - # timeout = "5s" +# ## Timeout for closing (default: 5s). +# # timeout = "5s" - ## Enable additional diagnostic logging. - enable_diagnostic_logging = false +# ## Enable additional diagnostic logging. +# enable_diagnostic_logging = false ## Context Tag Sources add Application Insights context tags to a tag value. ## @@ -175,7 +175,7 @@ # [outputs.application_insights.context_tag_sources] # "ai.cloud.role" = "kubernetes_container_name" # "ai.cloud.roleInstance" = "kubernetes_pod_name" - namepass = ["t.azm.ms/agent_telemetry"] + namepass = ["agent_telemetry"] #tagdrop = ["nodeName"] ############################################################################### @@ -348,13 +348,13 @@ ############################################################################### # AGGREGATOR PLUGINS # ############################################################################### -[[aggregators.quantile]] - period = "30m" - drop_original = true - quantiles = [0.95] - algorithm = "t-digest" - compression = 100.0 - namepass = ["t.azm.ms/agent_telemetry"] +# [[aggregators.quantile]] +# period = "30m" +# drop_original = true +# quantiles = [0.95] +# algorithm = "t-digest" +# compression = 100.0 +# namepass = ["t.azm.ms/agent_telemetry"] # # Keep the aggregate basicstats of each metric passing through. # [[aggregators.basicstats]] # ## General Aggregator Arguments: @@ -576,21 +576,21 @@ #fieldpass = ["numContainers", "numContainersRunning", "numContainersStopped", "numContainersPaused", "numContainerImages"] # taginclude = ["nodeName"] -[[inputs.procstat]] - name_prefix="t.azm.ms/" - exe = "mdsd" - interval = "60s" - pid_finder = "native" - pid_tag = true - name_override = "agent_telemetry" - fieldpass = ["cpu_usage", "memory_rss"] - [inputs.procstat.tags] - Computer = "$NODE_NAME" - AgentVersion = "$AGENT_VERSION" - ControllerType = "$CONTROLLER_TYPE" - AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" - ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" - Region = "$TELEMETRY_AKS_REGION" +# [[inputs.procstat]] +# name_prefix="t.azm.ms/" +# exe = "mdsd" +# interval = "60s" +# pid_finder = "native" +# pid_tag = true +# name_override = "agent_telemetry" +# fieldpass = ["cpu_usage", "memory_rss"] +# [inputs.procstat.tags] +# Computer = "$NODE_NAME" +# AgentVersion = "$AGENT_VERSION" +# ControllerType = "$CONTROLLER_TYPE" +# AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" +# ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" +# Region = "$TELEMETRY_AKS_REGION" # [[inputs.procstat]] # #name_prefix="container.azm.ms/" # exe = "ruby" diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml index 66265d1b7..8e5513f91 100644 --- a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml +++ b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml @@ -174,9 +174,6 @@ spec: securityContext: privileged: true volumeMounts: - - mountPath: /hostfs - name: host-root - readOnly: true - mountPath: /etc/kubernetes/host name: azure-json-path - mountPath: /etc/omsagent-secret diff --git a/charts/azuremonitor-containers/templates/omsagent-deployment.yaml b/charts/azuremonitor-containers/templates/omsagent-deployment.yaml index eda0d8f97..1eaf7f652 100644 --- a/charts/azuremonitor-containers/templates/omsagent-deployment.yaml +++ b/charts/azuremonitor-containers/templates/omsagent-deployment.yaml @@ -88,9 +88,6 @@ spec: protocol: TCP name: in-rs-tcp volumeMounts: - - mountPath: /hostfs - name: host-root - readOnly: true - mountPath: /var/run/host name: docker-sock - mountPath: /var/log @@ -133,9 +130,6 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} volumes: - - name: host-root - hostPath: - path: / - name: docker-sock hostPath: path: /var/run diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 550ae58e9..248276a08 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -357,7 +357,7 @@ spec: component: oms-agent tier: node annotations: - agentVersion: "1.10.0.1" + agentVersion: "azure-mdsd-1.14.2" dockerProviderVersion: "16.0.0-0" schema-versions: "v1" spec: @@ -492,9 +492,6 @@ spec: # securityContext: # privileged: true # volumeMounts: - # - mountPath: /hostfs - # name: host-root - # readOnly: true # - mountPath: /etc/kubernetes/host # name: azure-json-path # - mountPath: /etc/omsagent-secret @@ -599,7 +596,7 @@ spec: labels: rsName: "omsagent-rs" annotations: - agentVersion: "1.10.0.1" + agentVersion: "azure-mdsd-1.14.2" dockerProviderVersion: "16.0.0-0" schema-versions: "v1" spec: @@ -651,9 +648,6 @@ spec: protocol: TCP name: in-rs-tcp volumeMounts: - - mountPath: /hostfs - name: host-root - readOnly: true - mountPath: /var/run/host name: docker-sock - mountPath: /var/log @@ -719,9 +713,6 @@ spec: - operator: "Exists" effect: "PreferNoSchedule" volumes: - - name: host-root - hostPath: - path: / - name: docker-sock hostPath: path: /var/run @@ -774,7 +765,7 @@ spec: component: oms-agent-win tier: node-win annotations: - agentVersion: "1.10.0.1" + agentVersion: "0.0.0-0" dockerProviderVersion: "16.0.0-0" schema-versions: "v1" spec: @@ -789,7 +780,7 @@ spec: imagePullPolicy: IfNotPresent resources: limits: - cpu: 200m + cpu: 500m memory: 600Mi env: - name: FBIT_SERVICE_FLUSH_INTERVAL @@ -942,3 +933,4 @@ spec: names: plural: healthstates kind: HealthState + \ No newline at end of file From 766c0e8175c8e57e26d623ca1da2b38c8ed2ca5c Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Fri, 28 Jan 2022 16:44:38 -0800 Subject: [PATCH 8/9] remove mdsd telemetry changes --- .../conf/telegraf-prom-side-car.conf | 32 +++++++++---------- build/linux/installer/conf/telegraf-rs.conf | 30 ++++++++--------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/build/linux/installer/conf/telegraf-prom-side-car.conf b/build/linux/installer/conf/telegraf-prom-side-car.conf index 4519adfbe..a94150fad 100644 --- a/build/linux/installer/conf/telegraf-prom-side-car.conf +++ b/build/linux/installer/conf/telegraf-prom-side-car.conf @@ -201,19 +201,19 @@ $AZMON_TELEGRAF_CUSTOM_PROM_PLUGINS_WITH_NAMESPACE_FILTER ## OSM Prometheus configuration $AZMON_TELEGRAF_OSM_PROM_PLUGINS -[[inputs.procstat]] - name_prefix="t.azm.ms/" - exe = "mdsd" - interval = "60s" - pid_finder = "native" - pid_tag = true - name_override = "agent_telemetry" - fieldpass = ["cpu_usage", "memory_rss"] - [inputs.procstat.tags] - Computer = "$NODE_NAME" - AgentVersion = "$AGENT_VERSION" - ControllerType = "$CONTROLLER_TYPE" - AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" - ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" - Region = "$TELEMETRY_AKS_REGION" - ContainerType = "$CONTAINER_TYPE" +# [[inputs.procstat]] +# name_prefix="t.azm.ms/" +# exe = "mdsd" +# interval = "60s" +# pid_finder = "native" +# pid_tag = true +# name_override = "agent_telemetry" +# fieldpass = ["cpu_usage", "memory_rss"] +# [inputs.procstat.tags] +# Computer = "$NODE_NAME" +# AgentVersion = "$AGENT_VERSION" +# ControllerType = "$CONTROLLER_TYPE" +# AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" +# ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" +# Region = "$TELEMETRY_AKS_REGION" +# ContainerType = "$CONTAINER_TYPE" diff --git a/build/linux/installer/conf/telegraf-rs.conf b/build/linux/installer/conf/telegraf-rs.conf index cdb2a1c65..72fc25451 100644 --- a/build/linux/installer/conf/telegraf-rs.conf +++ b/build/linux/installer/conf/telegraf-rs.conf @@ -675,18 +675,18 @@ $AZMON_TELEGRAF_OSM_PROM_PLUGINS #[inputs.prometheus.tagpass] # operation_type = ["create_container", "remove_container", "pull_image"] -[[inputs.procstat]] - name_prefix="t.azm.ms/" - exe = "mdsd" - interval = "60s" - pid_finder = "native" - pid_tag = true - name_override = "agent_telemetry" - fieldpass = ["cpu_usage", "memory_rss"] - [inputs.procstat.tags] - Computer = "$NODE_NAME" - AgentVersion = "$AGENT_VERSION" - ControllerType = "$CONTROLLER_TYPE" - AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" - ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" - Region = "$TELEMETRY_AKS_REGION" +# [[inputs.procstat]] +# name_prefix="t.azm.ms/" +# exe = "mdsd" +# interval = "60s" +# pid_finder = "native" +# pid_tag = true +# name_override = "agent_telemetry" +# fieldpass = ["cpu_usage", "memory_rss"] +# [inputs.procstat.tags] +# Computer = "$NODE_NAME" +# AgentVersion = "$AGENT_VERSION" +# ControllerType = "$CONTROLLER_TYPE" +# AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID" +# ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME" +# Region = "$TELEMETRY_AKS_REGION" From 55ac536420f54157cd14808504df52cefecb1ad3 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Fri, 28 Jan 2022 17:07:12 -0800 Subject: [PATCH 9/9] remove mdsd telemetry changes --- build/linux/installer/conf/telegraf.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf index e96f1df10..9f213e3e8 100644 --- a/build/linux/installer/conf/telegraf.conf +++ b/build/linux/installer/conf/telegraf.conf @@ -175,7 +175,7 @@ # [outputs.application_insights.context_tag_sources] # "ai.cloud.role" = "kubernetes_container_name" # "ai.cloud.roleInstance" = "kubernetes_pod_name" - namepass = ["agent_telemetry"] + # namepass = ["agent_telemetry"] #tagdrop = ["nodeName"] ###############################################################################