diff --git a/build/linux/installer/conf/telegraf-rs.conf b/build/linux/installer/conf/telegraf-rs.conf index f1e9cc282..3f2f65cff 100644 --- a/build/linux/installer/conf/telegraf-rs.conf +++ b/build/linux/installer/conf/telegraf-rs.conf @@ -611,3 +611,45 @@ $AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER # Computer = "placeholder_hostname" # ControllerType = "$CONTROLLER_TYPE" +##npm +[[inputs.prometheus]] + #name_prefix="container.azm.ms/" + ## An array of urls to scrape metrics from. + urls = ["$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER"] + fielddrop = ["$AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER"] + + metric_version = 2 + url_tag = "scrapeUrl" + + ## An array of Kubernetes services to scrape metrics from. + # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"] + + ## Kubernetes config file to create client from. + # kube_config = "/path/to/kubernetes.config" + + ## Scrape Kubernetes pods for the following prometheus annotations: + ## - prometheus.io/scrape: Enable scraping for this pod + ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to + ## set this to `https` & most likely set the tls config. + ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation. + ## - prometheus.io/port: If port is not 9102 use this annotation + # monitor_kubernetes_pods = true + + ## Use bearer token for authorization. ('bearer_token' takes priority) + bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token" + ## OR + # bearer_token_string = "abc_123" + + ## Specify timeout duration for slower prometheus clients (default is 3s) + response_timeout = "15s" + + ## Optional TLS Config + tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + #tls_cert = /path/to/certfile + # tls_key = /path/to/keyfile + ## Use TLS but skip chain & host verification + insecure_skip_verify = true + #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"] + #[inputs.prometheus.tagpass] + # operation_type = ["create_container", "remove_container", "pull_image"] + diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf index b554dd4b3..19b6058be 100644 --- a/build/linux/installer/conf/telegraf.conf +++ b/build/linux/installer/conf/telegraf.conf @@ -703,6 +703,47 @@ insecure_skip_verify = true #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"] +##npm +[[inputs.prometheus]] + #name_prefix="container.azm.ms/" + ## An array of urls to scrape metrics from. + urls = ["$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE"] + + metric_version = 2 + url_tag = "scrapeUrl" + + ## An array of Kubernetes services to scrape metrics from. + # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"] + + ## Kubernetes config file to create client from. + # kube_config = "/path/to/kubernetes.config" + + ## Scrape Kubernetes pods for the following prometheus annotations: + ## - prometheus.io/scrape: Enable scraping for this pod + ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to + ## set this to `https` & most likely set the tls config. + ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation. + ## - prometheus.io/port: If port is not 9102 use this annotation + # monitor_kubernetes_pods = true + + ## Use bearer token for authorization. ('bearer_token' takes priority) + bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token" + ## OR + # bearer_token_string = "abc_123" + + ## Specify timeout duration for slower prometheus clients (default is 3s) + response_timeout = "15s" + + ## Optional TLS Config + tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + #tls_cert = /path/to/certfile + # tls_key = /path/to/keyfile + ## Use TLS but skip chain & host verification + insecure_skip_verify = true + #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"] + #[inputs.prometheus.tagpass] + # operation_type = ["create_container", "remove_container", "pull_image"] + # [[inputs.exec]] # ## Commands array # interval = "15m" diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data index f07e71b2d..fc5a6c8bc 100644 --- a/build/linux/installer/datafiles/base_container.data +++ b/build/linux/installer/datafiles/base_container.data @@ -125,6 +125,7 @@ MAINTAINER: 'Microsoft Corporation' /opt/tomlparser.rb; build/common/installer/scripts/tomlparser.rb; 755; root; root /opt/td-agent-bit-conf-customizer.rb; build/common/installer/scripts/td-agent-bit-conf-customizer.rb; 755; root; root /opt/ConfigParseErrorLogger.rb; build/common/installer/scripts/ConfigParseErrorLogger.rb; 755; root; root +/opt/tomlparser-npm-config.rb; build/linux/installer/scripts/tomlparser-npm-config.rb; 755; root; root /opt/microsoft/omsagent/plugin/filter_cadvisor_health_container.rb; source/plugins/ruby/filter_cadvisor_health_container.rb; 644; root; root diff --git a/build/linux/installer/scripts/tomlparser-npm-config.rb b/build/linux/installer/scripts/tomlparser-npm-config.rb new file mode 100644 index 000000000..c5953836b --- /dev/null +++ b/build/linux/installer/scripts/tomlparser-npm-config.rb @@ -0,0 +1,113 @@ +#!/usr/local/bin/ruby + +#this should be require relative in Linux and require in windows, since it is a gem install on windows +@os_type = ENV["OS_TYPE"] +if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0 + require "tomlrb" +else + require_relative "tomlrb" +end + +require_relative "ConfigParseErrorLogger" + +@configMapMountPath = "/etc/config/settings/integrations" +@configSchemaVersion = "" +@collect_basic_npm_metrics = false +@collect_advanced_npm_metrics = false +@npm_node_url="http://$NODE_IP:10091/node-metrics" +@npm_cluster_url="http://npm-metrics-cluster-service.kube-system:9000/cluster-metrics" +@npm_basic_drop_metrics_cluster = "npm_ipset_counts" + +# Use parser to parse the configmap toml file to a ruby structure +def parseConfigMap + begin + # Check to see if config map is created + if (File.file?(@configMapMountPath)) + puts "config::configmap container-azm-ms-agentconfig for npm metrics found, parsing values" + parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true) + puts "config::Successfully parsed mounted config map for npm metrics" + return parsedConfig + else + puts "config::configmap container-azm-ms-agentconfig for npm metrics not mounted, using defaults" + return nil + end + rescue => errorStr + ConfigParseErrorLogger.logError("Exception while parsing config map for npm metrics: #{errorStr}, using defaults, please check config map for errors") + return nil + end +end + +# Use the ruby structure created after config parsing to set the right values to be used as environment variables +def populateSettingValuesFromConfigMap(parsedConfig) + begin + if !parsedConfig.nil? && !parsedConfig[:integrations].nil? && !parsedConfig[:integrations][:azure_network_policy_manager].nil? && !parsedConfig[:integrations][:azure_network_policy_manager][:collect_advanced_metrics].nil? + advanced_npm_metrics = parsedConfig[:integrations][:azure_network_policy_manager][:collect_advanced_metrics].to_s + puts "got:integrations.azure_network_policy_manager.collect_advanced_metrics='#{advanced_npm_metrics}'" + if !advanced_npm_metrics.nil? && advanced_npm_metrics.strip.casecmp("true") == 0 + @collect_advanced_npm_metrics = true + else + @collect_advanced_npm_metrics = false + end + puts "set:integrations.azure_network_policy_manager.collect_advanced_metrics=#{@collect_advanced_npm_metrics}" + end + rescue => errorStr + puts "config::error:Exception while reading config settings for npm advanced setting - #{errorStr}, using defaults" + @collect_advanced_npm_metrics = false + end + begin + if !parsedConfig.nil? && !parsedConfig[:integrations].nil? && !parsedConfig[:integrations][:azure_network_policy_manager].nil? && !parsedConfig[:integrations][:azure_network_policy_manager][:collect_basic_metrics].nil? + basic_npm_metrics = parsedConfig[:integrations][:azure_network_policy_manager][:collect_basic_metrics].to_s + puts "got:integrations.azure_network_policy_manager.collect_basic_metrics='#{basic_npm_metrics}'" + if !basic_npm_metrics.nil? && basic_npm_metrics.strip.casecmp("true") == 0 + @collect_basic_npm_metrics = true + else + @collect_basic_npm_metrics = false + end + puts "set:integrations.azure_network_policy_manager.collect_basic_metrics=#{@collect_basic_npm_metrics}" + end + rescue => errorStr + puts "config::error:Exception while reading config settings for npm basic setting - #{errorStr}, using defaults" + @collect_basic_npm_metrics = false + end +end + +@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"] +puts "****************Start Config Processing********************" +if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it + configMapSettings = parseConfigMap + if !configMapSettings.nil? + populateSettingValuesFromConfigMap(configMapSettings) + end +else + if (File.file?(@configMapMountPath)) + ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version") + end + @collect_basic_npm_metrics = false + @collect_advanced_npm_metrics = false +end + +# Write the settings to file, so that they can be set as environment variables +file = File.open("integration_npm_config_env_var", "w") + +if !file.nil? + if @collect_advanced_npm_metrics == true + file.write("export TELEMETRY_NPM_INTEGRATION_METRICS_ADVANCED=1\n") + file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE=#{@npm_node_url}\n") + file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER=#{@npm_cluster_url}\n") + file.write("export AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER=\n") + elsif @collect_basic_npm_metrics == true + file.write("export TELEMETRY_NPM_INTEGRATION_METRICS_BASIC=1\n") + file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE=#{@npm_node_url}\n") + file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER=#{@npm_cluster_url}\n") + file.write("export AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER=#{@npm_basic_drop_metrics_cluster}\n") + else + file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE=\n") + file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER=\n") + file.write("export AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER=\n") + end + # Close file after writing all environment variables + file.close +else + puts "Exception while opening file for writing config environment variables" + puts "****************End Config Processing********************" +end \ No newline at end of file diff --git a/kubernetes/container-azm-ms-agentconfig.yaml b/kubernetes/container-azm-ms-agentconfig.yaml index f3f442608..58e09f041 100644 --- a/kubernetes/container-azm-ms-agentconfig.yaml +++ b/kubernetes/container-azm-ms-agentconfig.yaml @@ -100,6 +100,10 @@ data: container_memory_rss_threshold_percentage = 95.0 # Threshold for container memoryWorkingSet, metric will be sent only when memory working set exceeds or becomes equal to the following percentage container_memory_working_set_threshold_percentage = 95.0 + integrations: |- + [integrations.azure_network_policy_manager] + collect_basic_metrics = false + collect_advanced_metrics = false metadata: name: container-azm-ms-agentconfig namespace: kube-system diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh index 92f4977d6..311470660 100644 --- a/kubernetes/linux/main.sh +++ b/kubernetes/linux/main.sh @@ -160,7 +160,7 @@ done source config_env_var -#Parse the configmap to set the right environment variables. +#Parse the configmap to set the right environment variables for health feature. /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-health-config.rb cat health_config_env_var | while read line; do @@ -169,6 +169,15 @@ cat health_config_env_var | while read line; do done source health_config_env_var +#Parse the configmap to set the right environment variables for network policy manager (npm) integration. +/opt/microsoft/omsagent/ruby/bin/ruby tomlparser-npm-config.rb + +cat integration_npm_config_env_var | while read line; do + #echo $line + echo $line >> ~/.bashrc +done +source integration_npm_config_env_var + #Replace the placeholders in td-agent-bit.conf file for fluentbit with custom/default values in daemonset if [ ! -e "/etc/config/kube.conf" ]; then /opt/microsoft/omsagent/ruby/bin/ruby td-agent-bit-conf-customizer.rb diff --git a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb index 42ecfcaf0..13796cd1e 100644 --- a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb +++ b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb @@ -33,6 +33,8 @@ class CAdvisorMetricsAPIClient @cAdvisorMetricsSecurePort = ENV["IS_SECURE_CADVISOR_PORT"] @containerLogsRoute = ENV["AZMON_CONTAINER_LOGS_ROUTE"] @hmEnabled = ENV["AZMON_CLUSTER_ENABLE_HEALTH_MODEL"] + @npmIntegrationBasic = ENV["TELEMETRY_NPM_INTEGRATION_METRICS_BASIC"] + @npmIntegrationAdvanced = ENV["TELEMETRY_NPM_INTEGRATION_METRICS_ADVANCED"] @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt" @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M @@ -250,7 +252,13 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met #telemetry about health model if (!@hmEnabled.nil? && !@hmEnabled.empty?) telemetryProps["hmEnabled"] = @hmEnabled - end + end + #telemetry for npm integration + if (!@npmIntegrationAdvanced.nil? && !@npmIntegrationAdvanced.empty?) + telemetryProps["int-npm-a"] = "1" + elsif (!@npmIntegrationBasic.nil? && !@npmIntegrationBasic.empty?) + telemetryProps["int-npm-b"] = "1" + end ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) end end