microsoft · rashmichandrashekar · Jun 27, 2019 · Jun 25, 2019 · Jun 25, 2019 · Jun 25, 2019
diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
@@ -542,7 +542,7 @@
   ## An array of urls to scrape metrics from.
   #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
   #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
-  interval: "$AZMON_RS_PROM_INTERVAL"
+  interval = "$AZMON_RS_PROM_INTERVAL"
   ## An array of urls to scrape metrics from.
   urls = ["$AZMON_RS_PROM_URLS"]
 

diff --git a/installer/conf/telegraf-test-rs.conf b/installer/conf/telegraf-test-rs.conf
@@ -0,0 +1,113 @@
+# Telegraf Configuration
+#
+# Telegraf is entirely plugin driven. All metrics are gathered from the
+# declared inputs, and sent to the declared outputs.
+#
+# Plugins must be declared in here to be active.
+# To deactivate a plugin, comment out the name and any variables.
+#
+# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
+# file would generate.
+#
+# Environment variables can be used anywhere in this config file, simply prepend
+# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
+# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
+
+# Configuration for telegraf agent
+[agent]
+  ## Default data collection interval for all inputs
+  interval = "60s"
+  ## Rounds collection interval to 'interval'
+  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
+  round_interval = true
+
+  ## Telegraf will send metrics to outputs in batches of at most
+  ## metric_batch_size metrics.
+  ## This controls the size of writes that Telegraf sends to output plugins.
+  metric_batch_size = 1000
+
+  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
+  ## output, and will flush this buffer on a successful write. Oldest metrics
+  ## are dropped first when this buffer fills.
+  ## This buffer only fills when writes fail to output plugin(s).
+  metric_buffer_limit = 10000
+
+  ## Collection jitter is used to jitter the collection by a random amount.
+  ## Each plugin will sleep for a random time within jitter before collecting.
+  ## This can be used to avoid many plugins querying things like sysfs at the
+  ## same time, which can have a measurable effect on the system.
+  collection_jitter = "0s"
+
+  ## Default flushing interval for all outputs. You shouldn't set this below
+  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
+  flush_interval = "60s"
+  ## Jitter the flush interval by a random amount. This is primarily to avoid
+  ## large write spikes for users running a large number of telegraf instances.
+  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
+  flush_jitter = "0s"
+
+  ## By default or when set to "0s", precision will be set to the same
+  ## timestamp order as the collection interval, with the maximum being 1s.
+  ##   ie, when interval = "10s", precision will be "1s"
+  ##       when interval = "250ms", precision will be "1ms"
+  ## Precision will NOT be used for service inputs. It is up to each individual
+  ## service input to set the timestamp at the appropriate precision.
+  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
+  precision = ""
+
+  ## Logging configuration:
+  ## Run telegraf with debug log messages.
+  debug = false
+  ## Run telegraf in quiet mode (error log messages only).
+  quiet = false
+  ## Specify the log file name. The empty string means to log to stderr.
+  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
+
+  ## Override default hostname, if empty use os.Hostname()
+  #hostname = "placeholder_hostname"
+  ## If set to true, do no set the "host" tag in the telegraf agent.
+  omit_hostname = true
+
+
+###############################################################################
+#                            INPUT PLUGINS                                    #
+###############################################################################
+
+#Prometheus Custom Metrics
+[[inputs.prometheus]]
+  ## An array of urls to scrape metrics from.
+  interval = "$AZMON_RS_PROM_INTERVAL"
+
+  ## An array of urls to scrape metrics from.
+  #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
+  urls = ["$AZMON_RS_PROM_URLS"]
+
+  #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
+  fieldpass = ["$AZMON_RS_PROM_FIELDPASS"]
+  fielddrop = ["$AZMON_RS_PROM_FIELDDROP"]
+
+  ## An array of Kubernetes services to scrape metrics from.
+  #kubernetes_services = ["https://kube-state-metrics.monitoring:8443/metrics","https://kube-state-metrics.monitoring:9443/metrics","http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics"]
+  kubernetes_services = ["$AZMON_RS_PROM_K8S_SERVICES"]
+
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to `https` & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
+  monitor_kubernetes_pods = $AZMON_RS_PROM_MONITOR_PODS
+
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
diff --git a/installer/conf/telegraf-test.conf b/installer/conf/telegraf-test.conf
@@ -0,0 +1,100 @@
+# Telegraf Configuration
+#
+# Telegraf is entirely plugin driven. All metrics are gathered from the
+# declared inputs, and sent to the declared outputs.
+#
+# Plugins must be declared in here to be active.
+# To deactivate a plugin, comment out the name and any variables.
+#
+# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
+# file would generate.
+#
+# Environment variables can be used anywhere in this config file, simply prepend
+# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
+# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
+
+# Configuration for telegraf agent
+[agent]
+  ## Default data collection interval for all inputs
+  interval = "60s"
+  ## Rounds collection interval to 'interval'
+  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
+  round_interval = true
+
+  ## Telegraf will send metrics to outputs in batches of at most
+  ## metric_batch_size metrics.
+  ## This controls the size of writes that Telegraf sends to output plugins.
+  metric_batch_size = 1000
+
+  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
+  ## output, and will flush this buffer on a successful write. Oldest metrics
+  ## are dropped first when this buffer fills.
+  ## This buffer only fills when writes fail to output plugin(s).
+  metric_buffer_limit = 10000
+
+  ## Collection jitter is used to jitter the collection by a random amount.
+  ## Each plugin will sleep for a random time within jitter before collecting.
+  ## This can be used to avoid many plugins querying things like sysfs at the
+  ## same time, which can have a measurable effect on the system.
+  collection_jitter = "0s"
+
+  ## Default flushing interval for all outputs. You shouldn't set this below
+  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
+  flush_interval = "60s"
+  ## Jitter the flush interval by a random amount. This is primarily to avoid
+  ## large write spikes for users running a large number of telegraf instances.
+  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
+  flush_jitter = "0s"
+
+  ## By default or when set to "0s", precision will be set to the same
+  ## timestamp order as the collection interval, with the maximum being 1s.
+  ##   ie, when interval = "10s", precision will be "1s"
+  ##       when interval = "250ms", precision will be "1ms"
+  ## Precision will NOT be used for service inputs. It is up to each individual
+  ## service input to set the timestamp at the appropriate precision.
+  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
+  precision = ""
+
+  ## Logging configuration:
+  ## Run telegraf with debug log messages.
+  debug = false
+  ## Run telegraf in quiet mode (error log messages only).
+  quiet = false
+  ## Specify the log file name. The empty string means to log to stderr.
+  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
+
+  ## Override default hostname, if empty use os.Hostname()
+  #hostname = "placeholder_hostname"
+  ## If set to true, do no set the "host" tag in the telegraf agent.
+  omit_hostname = true
+
+
+###############################################################################
+#                            INPUT PLUGINS                                    #
+###############################################################################
+
+#Prometheus Custom Metrics
+[[inputs.prometheus]]
+  ## An array of urls to scrape metrics from.
+  interval = "$AZMON_DS_PROM_INTERVAL"
+
+  ## An array of urls to scrape metrics from.
+  #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
+  urls = ["$AZMON_DS_PROM_URLS"]
+
+  fieldpass = ["$AZMON_DS_PROM_FIELDPASS"]
+  fielddrop = ["$AZMON_DS_PROM_FIELDDROP"]
+
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
@@ -572,7 +572,7 @@
 ## prometheus custom metrics
 [[inputs.prometheus]]
 
-  interval: "$AZMON_DS_PROM_INTERVAL"
+  interval = "$AZMON_DS_PROM_INTERVAL"
 
   ## An array of urls to scrape metrics from.
   urls = ["$AZMON_DS_PROM_URLS"]

diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
@@ -110,9 +110,12 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
-/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	        installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
+/opt/telegraf-test.conf;			                                installer/conf/telegraf-test.conf;                    644; root; root
+/opt/telegraf-test-rs.conf;			                                installer/conf/telegraf-test-rs.conf;                    644; root; root
+/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	    installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
 /opt/livenessprobe.sh;                                              installer/scripts/livenessprobe.sh;      755; root; root
 /opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb;     755; root; root 
+/opt/tomlparser-prom-customconfig.rb;                               installer/scripts/tomlparser-prom-customconfig.rb;     755; root; root 
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root