From 939e323b2f6979e87e04985677818159c7528e97 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 14 Jan 2019 14:06:51 -0800
Subject: [PATCH 01/38] add configuration for telegraf

---
 installer/conf/telegraf.conf | 439 +++++++++++++++++++++++++++++++++++
 1 file changed, 439 insertions(+)
 create mode 100644 installer/conf/telegraf.conf

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
new file mode 100644
index 000000000..31619c45a
--- /dev/null
+++ b/installer/conf/telegraf.conf
@@ -0,0 +1,439 @@
+# Telegraf Configuration
+#
+# Telegraf is entirely plugin driven. All metrics are gathered from the
+# declared inputs, and sent to the declared outputs.
+#
+# Plugins must be declared in here to be active.
+# To deactivate a plugin, comment out the name and any variables.
+#
+# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
+# file would generate.
+#
+# Environment variables can be used anywhere in this config file, simply prepend
+# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
+# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
+
+
+# Global tags can be specified here in key="value" format.
+[global_tags]
+  # dc = "us-east-1" # will tag all metrics with dc=us-east-1
+  # rack = "1a"
+  ## Environment variables can be used as tags, and throughout the config file
+  # user = "$USER"
+  # cluster = "$ACS_RESOURCE_NAME"
+  #node = $NODE_IP
+
+
+# Configuration for telegraf agent
+[agent]
+  ## Default data collection interval for all inputs
+  interval = "60s"
+  ## Rounds collection interval to 'interval'
+  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
+  round_interval = true
+
+  ## Telegraf will send metrics to outputs in batches of at most
+  ## metric_batch_size metrics.
+  ## This controls the size of writes that Telegraf sends to output plugins.
+  metric_batch_size = 1000
+
+  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
+  ## output, and will flush this buffer on a successful write. Oldest metrics
+  ## are dropped first when this buffer fills.
+  ## This buffer only fills when writes fail to output plugin(s).
+  metric_buffer_limit = 10000
+
+  ## Collection jitter is used to jitter the collection by a random amount.
+  ## Each plugin will sleep for a random time within jitter before collecting.
+  ## This can be used to avoid many plugins querying things like sysfs at the
+  ## same time, which can have a measurable effect on the system.
+  collection_jitter = "0s"
+
+  ## Default flushing interval for all outputs. You shouldn't set this below
+  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
+  flush_interval = "10s"
+  ## Jitter the flush interval by a random amount. This is primarily to avoid
+  ## large write spikes for users running a large number of telegraf instances.
+  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
+  flush_jitter = "0s"
+
+  ## By default or when set to "0s", precision will be set to the same
+  ## timestamp order as the collection interval, with the maximum being 1s.
+  ##   ie, when interval = "10s", precision will be "1s"
+  ##       when interval = "250ms", precision will be "1ms"
+  ## Precision will NOT be used for service inputs. It is up to each individual
+  ## service input to set the timestamp at the appropriate precision.
+  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
+  precision = ""
+
+  ## Logging configuration:
+  ## Run telegraf with debug log messages.
+  debug = false
+  ## Run telegraf in quiet mode (error log messages only).
+  quiet = true
+  ## Specify the log file name. The empty string means to log to stderr.
+  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
+
+  ## Override default hostname, if empty use os.Hostname()
+  hostname = "$nodename"
+  ## If set to true, do no set the "host" tag in the telegraf agent.
+  omit_hostname = false
+
+
+###############################################################################
+#                            OUTPUT PLUGINS                                   #
+###############################################################################
+
+# Send aggregate metrics to Azure Monitor
+[[outputs.azure_monitor]]
+  ## Timeout for HTTP writes.
+  # timeout = "20s"
+
+  ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
+  namespace_prefix = "ContainerInsights/"
+
+  ## Azure Monitor doesn't have a string value type, so convert string
+  ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows
+  ## a maximum of 10 dimensions so Telegraf will only send the first 10
+  ## alphanumeric dimensions.
+  strings_as_dimensions = false
+
+  ## Both region and resource_id must be set or be available via the
+  ## Instance Metadata service on Azure Virtual Machines.
+  #
+  ## Azure Region to publish metrics against.
+  ##   ex: region = "southcentralus"
+  #region = "westeurope"
+  #
+  ## The Azure Resource ID against which metric will be logged, e.g.
+  ##   ex: resource_id = "/subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/virtualMachines/<vm_name>"
+  # resource_id = ""
+
+
+
+###############################################################################
+#                            PROCESSOR PLUGINS                                #
+###############################################################################
+
+# # Convert values to another metric value type
+# [[processors.converter]]
+#   ## Tags to convert
+#   ##
+#   ## The table key determines the target type, and the array of key-values
+#   ## select the keys to convert.  The array may contain globs.
+#   ##   <target-type> = [<tag-key>...]
+#   [processors.converter.tags]
+#     string = ["device"]
+#     integer = []
+#     unsigned = []
+#     boolean = []
+#     float = []
+#
+#   ## Fields to convert
+#   ##
+#   ## The table key determines the target type, and the array of key-values
+#   ## select the keys to convert.  The array may contain globs.
+#   ##   <target-type> = [<field-key>...]
+#   [processors.converter.fields]
+#     tag = ["host"]
+#     string = []
+#     integer = []
+#     unsigned = []
+#     boolean = []
+#     float = []
+
+
+# # Map enum values according to given table.
+# [[processors.enum]]
+#   [[processors.enum.mapping]]
+#     ## Name of the field to map
+#     field = "status"
+#
+#     ## Destination field to be used for the mapped value.  By default the source
+#     ## field is used, overwriting the original value.
+#     # dest = "status_code"
+#
+#     ## Default value to be used for all values not contained in the mapping
+#     ## table.  When unset, the unmodified value for the field will be used if no
+#     ## match is found.
+#     # default = 0
+#
+#     ## Table of mappings
+#     [processors.enum.mapping.value_mappings]
+#       green = 1
+#       yellow = 2
+#       red = 3
+
+
+# # Apply metric modifications using override semantics.
+# [[processors.override]]
+#   ## All modifications on inputs and aggregators can be overridden:
+#   # name_override = "new_name"
+#   # name_prefix = "new_name_prefix"
+#   # name_suffix = "new_name_suffix"
+#
+#   ## Tags to be added (all values must be strings)
+#   # [processors.override.tags]
+#   #   additional_tag = "tag_value"
+
+
+# # Parse a value in a specified field/tag(s) and add the result in a new metric
+# [[processors.parser]]
+#   ## The name of the fields whose value will be parsed.
+#   parse_fields = []
+#
+#   ## If true, incoming metrics are not emitted.
+#   drop_original = false
+#
+#   ## If set to override, emitted metrics will be merged by overriding the
+#   ## original metric using the newly parsed metrics.
+#   merge = "override"
+#
+#   ## The dataformat to be read from files
+#   ## Each data format has its own unique set of configuration options, read
+#   ## more about them here:
+#   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+#   data_format = "influx"
+
+
+# # Print all metrics that pass through this filter.
+# [[processors.printer]]
+
+
+# # Transforms tag and field values with regex pattern
+# [[processors.regex]]
+#   ## Tag and field conversions defined in a separate sub-tables
+#   # [[processors.regex.tags]]
+#   #   ## Tag to change
+#   #   key = "resp_code"
+#   #   ## Regular expression to match on a tag value
+#   #   pattern = "^(\\d)\\d\\d$"
+#   #   ## Pattern for constructing a new value (${1} represents first subgroup)
+#   #   replacement = "${1}xx"
+#
+#   # [[processors.regex.fields]]
+#   #   key = "request"
+#   #   ## All the power of the Go regular expressions available here
+#   #   ## For example, named subgroups
+#   #   pattern = "^/api(?P<method>/[\\w/]+)\\S*"
+#   #   replacement = "${method}"
+#   #   ## If result_key is present, a new field will be created
+#   #   ## instead of changing existing field
+#   #   result_key = "method"
+#
+#   ## Multiple conversions may be applied for one field sequentially
+#   ## Let's extract one more value
+#   # [[processors.regex.fields]]
+#   #   key = "request"
+#   #   pattern = ".*category=(\\w+).*"
+#   #   replacement = "${1}"
+#   #   result_key = "search_category"
+
+
+# # Rename measurements, tags, and fields that pass through this filter.
+# [[processors.rename]]
+
+
+# # Perform string processing on tags, fields, and measurements
+# [[processors.strings]]
+#   ## Convert a tag value to uppercase
+#   # [[processors.strings.uppercase]]
+#   #   tag = "method"
+#
+#   ## Convert a field value to lowercase and store in a new field
+#   # [[processors.strings.lowercase]]
+#   #   field = "uri_stem"
+#   #   dest = "uri_stem_normalised"
+#
+#   ## Trim leading and trailing whitespace using the default cutset
+#   # [[processors.strings.trim]]
+#   #   field = "message"
+#
+#   ## Trim leading characters in cutset
+#   # [[processors.strings.trim_left]]
+#   #   field = "message"
+#   #   cutset = "\t"
+#
+#   ## Trim trailing characters in cutset
+#   # [[processors.strings.trim_right]]
+#   #   field = "message"
+#   #   cutset = "\r\n"
+#
+#   ## Trim the given prefix from the field
+#   # [[processors.strings.trim_prefix]]
+#   #   field = "my_value"
+#   #   prefix = "my_"
+#
+#   ## Trim the given suffix from the field
+#   # [[processors.strings.trim_suffix]]
+#   #   field = "read_count"
+#   #   suffix = "_count"
+
+
+# # Print all metrics that pass through this filter.
+# [[processors.topk]]
+#   ## How many seconds between aggregations
+#   # period = 10
+#
+#   ## How many top metrics to return
+#   # k = 10
+#
+#   ## Over which tags should the aggregation be done. Globs can be specified, in
+#   ## which case any tag matching the glob will aggregated over. If set to an
+#   ## empty list is no aggregation over tags is done
+#   # group_by = ['*']
+#
+#   ## Over which fields are the top k are calculated
+#   # fields = ["value"]
+#
+#   ## What aggregation to use. Options: sum, mean, min, max
+#   # aggregation = "mean"
+#
+#   ## Instead of the top k largest metrics, return the bottom k lowest metrics
+#   # bottomk = false
+#
+#   ## The plugin assigns each metric a GroupBy tag generated from its name and
+#   ## tags. If this setting is different than "" the plugin will add a
+#   ## tag (which name will be the value of this setting) to each metric with
+#   ## the value of the calculated GroupBy tag. Useful for debugging
+#   # add_groupby_tag = ""
+#
+#   ## These settings provide a way to know the position of each metric in
+#   ## the top k. The 'add_rank_field' setting allows to specify for which
+#   ## fields the position is required. If the list is non empty, then a field
+#   ## will be added to each and every metric for each string present in this
+#   ## setting. This field will contain the ranking of the group that
+#   ## the metric belonged to when aggregated over that field.
+#   ## The name of the field will be set to the name of the aggregation field,
+#   ## suffixed with the string '_topk_rank'
+#   # add_rank_fields = []
+#
+#   ## These settings provide a way to know what values the plugin is generating
+#   ## when aggregating metrics. The 'add_agregate_field' setting allows to
+#   ## specify for which fields the final aggregation value is required. If the
+#   ## list is non empty, then a field will be added to each every metric for
+#   ## each field present in this setting. This field will contain
+#   ## the computed aggregation for the group that the metric belonged to when
+#   ## aggregated over that field.
+#   ## The name of the field will be set to the name of the aggregation field,
+#   ## suffixed with the string '_topk_aggregate'
+#   # add_aggregate_fields = []
+
+
+
+###############################################################################
+#                            AGGREGATOR PLUGINS                               #
+###############################################################################
+
+# # Keep the aggregate basicstats of each metric passing through.
+# [[aggregators.basicstats]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+
+
+# # Create aggregate histograms.
+# [[aggregators.histogram]]
+#   ## The period in which to flush the aggregator.
+#   period = "30s"
+#
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+#
+#   ## Example config that aggregates all fields of the metric.
+#   # [[aggregators.histogram.config]]
+#   #   ## The set of buckets.
+#   #   buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
+#   #   ## The name of metric.
+#   #   measurement_name = "cpu"
+#
+#   ## Example config that aggregates only specific fields of the metric.
+#   # [[aggregators.histogram.config]]
+#   #   ## The set of buckets.
+#   #   buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
+#   #   ## The name of metric.
+#   #   measurement_name = "diskio"
+#   #   ## The concrete fields of metric
+#   #   fields = ["io_time", "read_time", "write_time"]
+
+
+# # Keep the aggregate min/max of each metric passing through.
+# [[aggregators.minmax]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+
+
+# # Count the occurance of values in fields.
+# [[aggregators.valuecounter]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+#   ## The fields for which the values will be counted
+#   fields = []
+
+
+
+###############################################################################
+#                            INPUT PLUGINS                                    #
+###############################################################################
+
+# Read metrics about cpu usage
+#[[inputs.cpu]]
+  ## Whether to report per-cpu stats or not
+#  percpu = false
+  ## Whether to report total system cpu stats or not
+#  totalcpu = true
+  ## If true, collect raw CPU time metrics.
+#  collect_cpu_time = false
+  ## If true, compute and report the sum of all non-idle CPU states.
+#  report_active = true
+#  fieldpass = ["usage_active","cluster","node","host","device"]
+#  taginclude = ["cluster","cpu","node"]
+  
+
+
+# Read metrics about disk usage by mount point
+[[inputs.disk]]
+  ## By default stats will be gathered for all mount points.
+  ## Set mount_points will restrict the stats to only the specified mount points.
+  # mount_points = ["/"]
+
+  ## Ignore mount points by filesystem type.
+  ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"]
+  fieldpass = ["used_percent", "used", "free", "total"]
+  taginclude = ["device","fstype","mode","path","host"]
+
+
+# Read metrics about memory usage
+#[[inputs.mem]]
+#  fieldpass = ["used_percent", "cluster", "node","host","device"]
+#  taginclude = ["cluster","node"]
+
+
+# Read metrics about network interface usage
+[[inputs.net]]
+  ## By default, telegraf gathers stats from any up interface (excluding loopback)
+  ## Setting interfaces will tell it to gather these explicit interfaces,
+  ## regardless of status.
+  ##
+  # interfaces = ["eth0"]
+  ##
+  ## On linux systems telegraf also collects protocol stats.
+  ## Setting ignore_protocol_stats to true will skip reporting of protocol metrics.
+  ##
+  ignore_protocol_stats = true
+  ##
+  fieldpass = ["bytes_sent","bytes_recv","err_in", "err_out"]
+  taginclude = ["interface","host"]
+

From 93f70b9261475ad2bb91002dd1350e4dafe488e9 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 14 Jan 2019 14:07:17 -0800
Subject: [PATCH 02/38] fix for perms

---
 installer/datafiles/base_container.data | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 7181929e2..966fe44ee 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -91,6 +91,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/td-agent-bit/bin/out_oms.so;                                intermediate/${{BUILD_CONFIGURATION}}/out_oms.so;  	755; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
+/etc/telegraf/telegraf.conf;			                            installer/conf/telegraf.conf;                    644; root; root
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
@@ -130,6 +131,7 @@ MAINTAINER:              'Microsoft Corporation'
 
 /opt/td-agent-bit;                          755; root; root;sysdir
 /opt/td-agent-bit/bin;                      755; root; root;sysdir
+/etc/telegraf;                              755; root; root;sysdir
 
 /opt/microsoft/omsagent/plugin/lib;												755; root; root; sysdir
 /opt/microsoft/omsagent/plugin/lib/application_insights;                        755; root; root; sysdir

From a5f32b82a798b83596774e6948116602eb0068ef Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 14 Jan 2019 16:10:54 -0800
Subject: [PATCH 03/38] fix telegraf config.

---
 installer/conf/telegraf.conf | 42 +++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 31619c45a..6891ae138 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -235,7 +235,47 @@
 
 
 # # Perform string processing on tags, fields, and measurements
-# [[processors.strings]]
+[[processors.strings]]
+  [[processors.strings.replace]]
+     measurement = "disk"
+     old = "disk"
+     new = "node"
+  [[processors.strings.replace]]
+     field = "free"
+     old = "free"
+     new = "diskFreeBytes"
+  [[processors.strings.replace]]
+     field = "used"
+     old = "used"
+     new = "diskUsedBytes"
+  [[processors.strings.replace]]
+     field = "total"
+     old = "total"
+     new = "diskTotalBytes"
+  [[processors.strings.replace]]
+     field = "used_percent"
+     old = "used_percent"
+     new = "diskUsedPercentage"
+  [[processors.strings.replace]]
+     measurement = "net"
+     old = "net"
+     new = "node"
+  [[processors.strings.replace]]
+     field = "bytes_recv"
+     old = "bytes_recv"
+     new = "networkBytesReceived"
+  [[processors.strings.replace]]
+     field = "bytes_sent"
+     old = "bytes_sent"
+     new = "networkBytesReceived"
+  [[processors.strings.replace]]
+     field = "err_in"
+     old = "err_in"
+     new = "networkErrorIn"
+  [[processors.strings.replace]]
+     field = "err_out"
+     old = "err_out"
+     new = "networkErrorOut"
 #   ## Convert a tag value to uppercase
 #   # [[processors.strings.uppercase]]
 #   #   tag = "method"

From a6c2d2b69b6163deee6d737fbd3e4d6255bee416 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 15 Jan 2019 10:10:05 -0800
Subject: [PATCH 04/38] fix file location & config

---
 installer/conf/telegraf.conf            | 10 +++++-----
 installer/datafiles/base_container.data |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 6891ae138..c2ae42793 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -267,15 +267,15 @@
   [[processors.strings.replace]]
      field = "bytes_sent"
      old = "bytes_sent"
-     new = "networkBytesReceived"
+     new = "networkBytesSent"
   [[processors.strings.replace]]
      field = "err_in"
      old = "err_in"
-     new = "networkErrorIn"
+     new = "networkErrorsIn"
   [[processors.strings.replace]]
      field = "err_out"
      old = "err_out"
-     new = "networkErrorOut"
+     new = "networkErrorsOut"
 #   ## Convert a tag value to uppercase
 #   # [[processors.strings.uppercase]]
 #   #   tag = "method"
@@ -451,7 +451,7 @@
 
   ## Ignore mount points by filesystem type.
   ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"]
-  fieldpass = ["used_percent", "used", "free", "total"]
+  fieldpass = ["diskFreeBytes", "diskUsedBytes", "diskTotalBytes", "diskUsedPercentage"]
   taginclude = ["device","fstype","mode","path","host"]
 
 
@@ -474,6 +474,6 @@
   ##
   ignore_protocol_stats = true
   ##
-  fieldpass = ["bytes_sent","bytes_recv","err_in", "err_out"]
+  fieldpass = ["networkBytesReceived", "networkBytesSent", "networkErrorsIn", "networkErrorsOut"]
   taginclude = ["interface","host"]
 
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 966fe44ee..255e6ebfd 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -91,7 +91,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/td-agent-bit/bin/out_oms.so;                                intermediate/${{BUILD_CONFIGURATION}}/out_oms.so;  	755; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
-/etc/telegraf/telegraf.conf;			                            installer/conf/telegraf.conf;                    644; root; root
+/etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root

From cdfafaaaa9be8aac0b3581c5e6a7b66a7a8f16bc Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 15 Jan 2019 12:58:29 -0800
Subject: [PATCH 05/38] update to config

---
 installer/conf/telegraf.conf | 58 +++++++++++++++---------------------
 1 file changed, 24 insertions(+), 34 deletions(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index c2ae42793..f2e38e269 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -70,7 +70,7 @@
   ## Run telegraf with debug log messages.
   debug = false
   ## Run telegraf in quiet mode (error log messages only).
-  quiet = true
+  quiet = false
   ## Specify the log file name. The empty string means to log to stderr.
   logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
 
@@ -235,47 +235,37 @@
 
 
 # # Perform string processing on tags, fields, and measurements
-[[processors.strings]]
-  [[processors.strings.replace]]
+[[processors.rename]]
+  [[processors.rename.replace]]
      measurement = "disk"
-     old = "disk"
-     new = "node"
-  [[processors.strings.replace]]
+     dest = "node"
+  [[processors.rename.replace]]
      field = "free"
-     old = "free"
-     new = "diskFreeBytes"
-  [[processors.strings.replace]]
+     dest = "diskFreeBytes"
+  [[processors.rename.replace]]
      field = "used"
-     old = "used"
-     new = "diskUsedBytes"
-  [[processors.strings.replace]]
+     dest = "diskUsedBytes"
+  [[processors.rename.replace]]
      field = "total"
-     old = "total"
-     new = "diskTotalBytes"
-  [[processors.strings.replace]]
+     dest = "diskTotalBytes"
+  [[processors.rename.replace]]
      field = "used_percent"
-     old = "used_percent"
-     new = "diskUsedPercentage"
-  [[processors.strings.replace]]
+     dest = "diskUsedPercentage"
+  [[processors.rename.replace]]
      measurement = "net"
-     old = "net"
-     new = "node"
-  [[processors.strings.replace]]
+     dest = "node"
+  [[processors.rename.replace]]
      field = "bytes_recv"
-     old = "bytes_recv"
-     new = "networkBytesReceived"
-  [[processors.strings.replace]]
+     dest = "networkBytesReceived"
+  [[processors.rename.replace]]
      field = "bytes_sent"
-     old = "bytes_sent"
-     new = "networkBytesSent"
-  [[processors.strings.replace]]
+     dest = "networkBytesSent"
+  [[processors.rename.replace]]
      field = "err_in"
-     old = "err_in"
-     new = "networkErrorsIn"
-  [[processors.strings.replace]]
+     dest = "networkErrorsIn"
+  [[processors.rename.replace]]
      field = "err_out"
-     old = "err_out"
-     new = "networkErrorsOut"
+     dest = "networkErrorsOut"
 #   ## Convert a tag value to uppercase
 #   # [[processors.strings.uppercase]]
 #   #   tag = "method"
@@ -451,7 +441,7 @@
 
   ## Ignore mount points by filesystem type.
   ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"]
-  fieldpass = ["diskFreeBytes", "diskUsedBytes", "diskTotalBytes", "diskUsedPercentage"]
+  fieldpass = ["free", "used", "total", "used_percent"]
   taginclude = ["device","fstype","mode","path","host"]
 
 
@@ -474,6 +464,6 @@
   ##
   ignore_protocol_stats = true
   ##
-  fieldpass = ["networkBytesReceived", "networkBytesSent", "networkErrorsIn", "networkErrorsOut"]
+  fieldpass = ["bytes_recv", "bytes_sent", "err_in", "err_out"]
   taginclude = ["interface","host"]
 

From 5668ce72dfeedad8e8debec69c28290b8459dba9 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 15 Jan 2019 13:38:14 -0800
Subject: [PATCH 06/38] fix namespace

---
 installer/conf/telegraf.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index f2e38e269..2bc4b3625 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -90,7 +90,7 @@
   # timeout = "20s"
 
   ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
-  namespace_prefix = "ContainerInsights/"
+  namespace_prefix = "Container.Insights/"
 
   ## Azure Monitor doesn't have a string value type, so convert string
   ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows

From 6b2472590f2df8af75eb0132f31ef4c6773ba790 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 15 Jan 2019 16:39:48 -0800
Subject: [PATCH 07/38] trying different namespace and also debug=true

---
 installer/conf/telegraf.conf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 2bc4b3625..9cbb287f6 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -68,7 +68,7 @@
 
   ## Logging configuration:
   ## Run telegraf with debug log messages.
-  debug = false
+  debug = true
   ## Run telegraf in quiet mode (error log messages only).
   quiet = false
   ## Specify the log file name. The empty string means to log to stderr.
@@ -90,7 +90,7 @@
   # timeout = "20s"
 
   ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
-  namespace_prefix = "Container.Insights/"
+  namespace_prefix = "Container.Insights3/"
 
   ## Azure Monitor doesn't have a string value type, so convert string
   ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows

From 2cc4cf976e1d9cc0da38a1806fc4f85ed7d76794 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 15 Jan 2019 19:40:17 -0800
Subject: [PATCH 08/38] add placeholder for nodename

---
 installer/conf/telegraf.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 9cbb287f6..1c38bbcfe 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -75,7 +75,7 @@
   logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
 
   ## Override default hostname, if empty use os.Hostname()
-  hostname = "$nodename"
+  hostname = "placeholder_hostname"
   ## If set to true, do no set the "host" tag in the telegraf agent.
   omit_hostname = false
 

From 53b302c7f7e018c878163e1042fbab0461e6fad1 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 15 Jan 2019 19:48:50 -0800
Subject: [PATCH 09/38] change namespace

---
 installer/conf/telegraf.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 1c38bbcfe..21d23fd71 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -90,7 +90,7 @@
   # timeout = "20s"
 
   ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
-  namespace_prefix = "Container.Insights3/"
+  namespace_prefix = "Insights.Container/"
 
   ## Azure Monitor doesn't have a string value type, so convert string
   ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows

From dd7d618fe38cd9fdded687f34de056bd68ebda26 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Thu, 24 Jan 2019 14:44:47 -0800
Subject: [PATCH 10/38] updated config

---
 installer/conf/telegraf.conf | 87 +++++++++++++++++++++++++++++++++---
 1 file changed, 81 insertions(+), 6 deletions(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 21d23fd71..d94fcc4e4 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -90,7 +90,7 @@
   # timeout = "20s"
 
   ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
-  namespace_prefix = "Insights.Container/"
+  namespace_prefix = "Insights.Container2/"
 
   ## Azure Monitor doesn't have a string value type, so convert string
   ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows
@@ -256,16 +256,43 @@
      dest = "node"
   [[processors.rename.replace]]
      field = "bytes_recv"
-     dest = "networkBytesReceived"
+     dest = "networkBytesReceivedTotal"
   [[processors.rename.replace]]
      field = "bytes_sent"
-     dest = "networkBytesSent"
+     dest = "networkBytesSentTotal"
   [[processors.rename.replace]]
      field = "err_in"
-     dest = "networkErrorsIn"
+     dest = "networkErrorsInTotal"
   [[processors.rename.replace]]
      field = "err_out"
-     dest = "networkErrorsOut"
+     dest = "networkErrorsOutTotal"
+  [[processors.rename.replace]]
+     measurement = "diskio"
+     dest = "node"
+  [[processors.rename.replace]]
+     field = "iops_in_progress"
+     dest = "diskIopsInProgress"
+  [[processors.rename.replace]]
+     measurement = "kubernetes_pod_volume"
+     dest = "pod"
+  [[processors.rename.replace]]
+     field = "used_bytes"
+     dest = "podVolumeUsedBytes"
+  [[processors.rename.replace]]
+     field = "capacity_bytes"
+     dest = "podVolumeCapacityBytes"
+  [[processors.rename.replace]]
+     field = "available_bytes"
+     dest = "podVolumeAvailableBytes"
+  [[processors.rename.replace]]
+     measurement = "kubernetes_pod_network"
+     dest = "pod"
+  [[processors.rename.replace]]
+     field = "tx_errors"
+     dest = "podNetworkTxErrorsTotal"
+  [[processors.rename.replace]]
+     field = "rx_errors"
+     dest = "podNetworkRxErrorsTotal"
 #   ## Convert a tag value to uppercase
 #   # [[processors.strings.uppercase]]
 #   #   tag = "method"
@@ -464,6 +491,54 @@
   ##
   ignore_protocol_stats = true
   ##
-  fieldpass = ["bytes_recv", "bytes_sent", "err_in", "err_out"]
+  #fieldpass = ["bytes_recv", "bytes_sent", "err_in", "err_out"]
+  fieldpass = ["err_in", "err_out"]
   taginclude = ["interface","host"]
 
+# Read metrics about disk IO by device
+[[inputs.diskio]]
+  ## By default, telegraf will gather stats for all devices including
+  ## disk partitions.
+  ## Setting devices will restrict the stats to the specified devices.
+  # devices = ["sda", "sdb", "vd*"]
+  ## Uncomment the following line if you need disk serial numbers.
+  # skip_serial_number = false
+  #
+  ## On systems which support it, device metadata can be added in the form of
+  ## tags.
+  ## Currently only Linux is supported via udev properties. You can view
+  ## available properties for a device by running:
+  ## 'udevadm info -q property -n /dev/sda'
+  # device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"]
+  #
+  ## Using the same metadata source as device_tags, you can also customize the
+  ## name of the device via templates.
+  ## The 'name_templates' parameter is a list of templates to try and apply to
+  ## the device. The template may contain variables in the form of '$PROPERTY' or
+  ## '${PROPERTY}'. The first template which does not contain any variables not
+  ## present for the device is used as the device name tag.
+  ## The typical use case is for LVM volumes, to get the VG/LV name instead of
+  ## the near-meaningless DM-0 name.
+  # name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"]
+  fieldpass = ["iops_in_progress"]
+  taginclude = ["name","host"]
+# Read metrics from the kubernetes kubelet api
+[[inputs.kubernetes]]
+  ## URL for the kubelet
+  #url = "http://1.1.1.1:10255"
+  url = "http://placeholder_nodeip:10255/stats/summary"
+
+  ## Use bearer token for authorization
+  # bearer_token = /path/to/bearer/token
+
+  ## Set response_timeout (default 5 seconds)
+  # response_timeout = "5s"
+
+  ## Optional TLS Config
+  # tls_ca = /path/to/cafile
+  # tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+  # insecure_skip_verify = false
+  fieldpass = ["used_bytes", "capacity_bytes", "available_bytes", "tx_errors", "rx_errors"  ]
+  taginclude = ["volume_name","host","namespace","pod_name",]

From 702e5082396d56d0bc9174bfb24832aee7b89844 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 25 Jan 2019 12:58:52 -0800
Subject: [PATCH 11/38] fix uri

---
 installer/conf/telegraf.conf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index d94fcc4e4..d7071db8b 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -90,7 +90,7 @@
   # timeout = "20s"
 
   ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
-  namespace_prefix = "Insights.Container2/"
+  namespace_prefix = "Insights.Containers/"
 
   ## Azure Monitor doesn't have a string value type, so convert string
   ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows
@@ -526,7 +526,7 @@
 [[inputs.kubernetes]]
   ## URL for the kubelet
   #url = "http://1.1.1.1:10255"
-  url = "http://placeholder_nodeip:10255/stats/summary"
+  url = "http://placeholder_nodeip:10255"
 
   ## Use bearer token for authorization
   # bearer_token = /path/to/bearer/token

From 34e374d2cd832b4c2dbe98d7d88a9bbc4dd40441 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 29 Jan 2019 09:55:26 -0800
Subject: [PATCH 12/38] fix azMon settings

---
 installer/conf/telegraf.conf | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index d7071db8b..85f438ca3 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -96,18 +96,25 @@
   ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows
   ## a maximum of 10 dimensions so Telegraf will only send the first 10
   ## alphanumeric dimensions.
-  strings_as_dimensions = false
+  strings_as_dimensions = true
 
   ## Both region and resource_id must be set or be available via the
   ## Instance Metadata service on Azure Virtual Machines.
   #
   ## Azure Region to publish metrics against.
   ##   ex: region = "southcentralus"
-  #region = "westeurope"
+  region = "placeholder_region"
   #
   ## The Azure Resource ID against which metric will be logged, e.g.
-  ##   ex: resource_id = "/subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/virtualMachines/<vm_name>"
-  # resource_id = ""
+  #resource_id = "/subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/virtualMachines/<vm_name>"
+  resource_id = "placeholder_resource_id"
+
+  azure_tenant_id = "placeholder_azure_tenant_id"
+
+  azure_client_id = "placeholder_azure_client_id"
+
+  azure_client_secret = "placeholder_azure_client_secret"
+
 
 
 

From 0d1b3c13e6e9412b73a44c8ff0572d1e9a7778cc Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 29 Jan 2019 13:58:08 -0800
Subject: [PATCH 13/38] remove aad settings

---
 installer/conf/telegraf.conf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 85f438ca3..1eb0173e6 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -109,11 +109,11 @@
   #resource_id = "/subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/virtualMachines/<vm_name>"
   resource_id = "placeholder_resource_id"
 
-  azure_tenant_id = "placeholder_azure_tenant_id"
+  #azure_tenant_id = "placeholder_azure_tenant_id"
 
-  azure_client_id = "placeholder_azure_client_id"
+  #azure_client_id = "placeholder_azure_client_id"
 
-  azure_client_secret = "placeholder_azure_client_secret"
+  #azure_client_secret = "placeholder_azure_client_secret"
 
 
 

From 36c46a29f687bb98967ab8748052fd89b9abdb56 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Wed, 30 Jan 2019 14:20:14 -0800
Subject: [PATCH 14/38] add custom metrics regions

---
 installer/conf/custom_metrics_regions.conf | 7 +++++++
 installer/conf/telegraf.conf               | 2 +-
 installer/datafiles/base_container.data    | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 installer/conf/custom_metrics_regions.conf

diff --git a/installer/conf/custom_metrics_regions.conf b/installer/conf/custom_metrics_regions.conf
new file mode 100644
index 000000000..bf548abdd
--- /dev/null
+++ b/installer/conf/custom_metrics_regions.conf
@@ -0,0 +1,7 @@
+eastus
+southcentralus
+westcentralus
+westus2
+southeastasia
+northeurope
+westeurope
\ No newline at end of file
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 1eb0173e6..216a7a250 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -90,7 +90,7 @@
   # timeout = "20s"
 
   ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
-  namespace_prefix = "Insights.Containers/"
+  namespace_prefix = "Insights.Container/"
 
   ## Azure Monitor doesn't have a string value type, so convert string
   ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 255e6ebfd..155f5ef8c 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -92,6 +92,7 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
+/etc/opt/microsoft/docker-cimprov/custom_metrics_regions.conf;		installer/conf/custom_metrics_regions.conf;      644; root; root
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root

From d1fc7114f0f1d90bb3191b82bd2ee0fc63441315 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Wed, 30 Jan 2019 18:57:34 -0800
Subject: [PATCH 15/38] fix config

---
 installer/conf/telegraf.conf | 65 ++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 6 deletions(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 216a7a250..12a42d493 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -245,7 +245,7 @@
 [[processors.rename]]
   [[processors.rename.replace]]
      measurement = "disk"
-     dest = "node"
+     dest = "nodes"
   [[processors.rename.replace]]
      field = "free"
      dest = "diskFreeBytes"
@@ -260,7 +260,7 @@
      dest = "diskUsedPercentage"
   [[processors.rename.replace]]
      measurement = "net"
-     dest = "node"
+     dest = "nodes"
   [[processors.rename.replace]]
      field = "bytes_recv"
      dest = "networkBytesReceivedTotal"
@@ -275,13 +275,13 @@
      dest = "networkErrorsOutTotal"
   [[processors.rename.replace]]
      measurement = "diskio"
-     dest = "node"
+     dest = "nodes"
   [[processors.rename.replace]]
      field = "iops_in_progress"
      dest = "diskIopsInProgress"
   [[processors.rename.replace]]
      measurement = "kubernetes_pod_volume"
-     dest = "pod"
+     dest = "pods"
   [[processors.rename.replace]]
      field = "used_bytes"
      dest = "podVolumeUsedBytes"
@@ -293,13 +293,19 @@
      dest = "podVolumeAvailableBytes"
   [[processors.rename.replace]]
      measurement = "kubernetes_pod_network"
-     dest = "pod"
+     dest = "pods"
   [[processors.rename.replace]]
      field = "tx_errors"
      dest = "podNetworkTxErrorsTotal"
   [[processors.rename.replace]]
      field = "rx_errors"
      dest = "podNetworkRxErrorsTotal"
+  [[processors.rename.replace]]
+     measurement = "docker"
+     dest = "containers"
+  [[processors.rename.replace]]
+     measurement = "docker_container_status"
+     dest = "containers"
 #   ## Convert a tag value to uppercase
 #   # [[processors.strings.uppercase]]
 #   #   tag = "method"
@@ -548,4 +554,51 @@
   ## Use TLS but skip chain & host verification
   # insecure_skip_verify = false
   fieldpass = ["used_bytes", "capacity_bytes", "available_bytes", "tx_errors", "rx_errors"  ]
-  taginclude = ["volume_name","host","namespace","pod_name",]
+  taginclude = ["volume_name","host","namespace","pod_name"]
+# Read metrics about docker containers
+[[inputs.docker]]
+  ## Docker Endpoint
+  ##   To use TCP, set endpoint = "tcp://[ip]:[port]"
+  ##   To use environment variables (ie, docker-machine), set endpoint = "ENV"
+  endpoint = "unix:///var/run/host/docker.sock"
+
+  ## Set to true to collect Swarm metrics(desired_replicas, running_replicas)
+  gather_services = false
+
+  ## Only collect metrics for these containers, collect all if empty
+  container_names = []
+
+  ## Containers to include and exclude. Globs accepted.
+  ## Note that an empty array for both will include all containers
+  container_name_include = []
+  container_name_exclude = []
+
+  ## Container states to include and exclude. Globs accepted.
+  ## When empty only containers in the "running" state will be captured.
+  container_state_include = ['*']
+  # container_state_exclude = []
+
+  ## Timeout for docker list, info, and stats commands
+  timeout = "5s"
+
+  ## Whether to report for each container per-device blkio (8:0, 8:1...) and
+  ## network (eth0, eth1, ...) stats or not
+  perdevice = true
+  ## Whether to report for each container total blkio and network stats or not
+  total = true
+  ## Which environment variables should we use as a tag
+  ##tag_env = ["JAVA_HOME", "HEAP_SIZE"]
+
+  ## docker labels to include and exclude as tags.  Globs accepted.
+  ## Note that an empty array for both will include all labels as tags
+  docker_label_include = []
+  docker_label_exclude = []
+
+  ## Optional TLS Config
+  # tls_ca = "/etc/telegraf/ca.pem"
+  # tls_cert = "/etc/telegraf/cert.pem"
+  # tls_key = "/etc/telegraf/key.pem"
+  ## Use TLS but skip chain & host verification
+  # insecure_skip_verify = false
+  fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images" ,"oomkilled", "exitcode" ]
+  taginclude = ["host", "container_name", "container_status", "container_image"]
\ No newline at end of file

From e68bba5b07565c00e3521ffef916ee16cd8dc290 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Thu, 7 Feb 2019 20:13:31 -0800
Subject: [PATCH 16/38] add support for replica-set config

---
 installer/conf/telegraf-rs.conf         | 528 ++++++++++++++++++++++++
 installer/conf/telegraf.conf            |  84 ++--
 installer/datafiles/base_container.data |   1 +
 3 files changed, 569 insertions(+), 44 deletions(-)
 create mode 100644 installer/conf/telegraf-rs.conf

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
new file mode 100644
index 000000000..fc8abfe26
--- /dev/null
+++ b/installer/conf/telegraf-rs.conf
@@ -0,0 +1,528 @@
+# Telegraf Configuration
+#
+# Telegraf is entirely plugin driven. All metrics are gathered from the
+# declared inputs, and sent to the declared outputs.
+#
+# Plugins must be declared in here to be active.
+# To deactivate a plugin, comment out the name and any variables.
+#
+# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
+# file would generate.
+#
+# Environment variables can be used anywhere in this config file, simply prepend
+# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
+# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
+
+
+# Global tags can be specified here in key="value" format.
+[global_tags]
+  # dc = "us-east-1" # will tag all metrics with dc=us-east-1
+  # rack = "1a"
+  ## Environment variables can be used as tags, and throughout the config file
+  # user = "$USER"
+  # cluster = "$ACS_RESOURCE_NAME"
+  #node = $NODE_IP
+  AgentVersion = "$AGENT_VERSION"
+  AKS_RESOURCE_ID = "$AKS_RESOURCE_ID"
+  Region = "$AKS_REGION"
+  ClusterName = "$AKS_CLUSTER_NAME"
+  ClusterType = "AKS"
+  Computer = "placeholder_hostname"
+  ControllerType = "$CONTROLLER_TYPE"
+
+# Configuration for telegraf agent
+[agent]
+  ## Default data collection interval for all inputs
+  interval = "60s"
+  ## Rounds collection interval to 'interval'
+  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
+  round_interval = true
+
+  ## Telegraf will send metrics to outputs in batches of at most
+  ## metric_batch_size metrics.
+  ## This controls the size of writes that Telegraf sends to output plugins.
+  metric_batch_size = 1000
+
+  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
+  ## output, and will flush this buffer on a successful write. Oldest metrics
+  ## are dropped first when this buffer fills.
+  ## This buffer only fills when writes fail to output plugin(s).
+  metric_buffer_limit = 10000
+
+  ## Collection jitter is used to jitter the collection by a random amount.
+  ## Each plugin will sleep for a random time within jitter before collecting.
+  ## This can be used to avoid many plugins querying things like sysfs at the
+  ## same time, which can have a measurable effect on the system.
+  collection_jitter = "0s"
+
+  ## Default flushing interval for all outputs. You shouldn't set this below
+  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
+  flush_interval = "10s"
+  ## Jitter the flush interval by a random amount. This is primarily to avoid
+  ## large write spikes for users running a large number of telegraf instances.
+  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
+  flush_jitter = "0s"
+
+  ## By default or when set to "0s", precision will be set to the same
+  ## timestamp order as the collection interval, with the maximum being 1s.
+  ##   ie, when interval = "10s", precision will be "1s"
+  ##       when interval = "250ms", precision will be "1ms"
+  ## Precision will NOT be used for service inputs. It is up to each individual
+  ## service input to set the timestamp at the appropriate precision.
+  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
+  precision = ""
+
+  ## Logging configuration:
+  ## Run telegraf with debug log messages.
+  debug = true
+  ## Run telegraf in quiet mode (error log messages only).
+  quiet = false
+  ## Specify the log file name. The empty string means to log to stderr.
+  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
+
+  ## Override default hostname, if empty use os.Hostname()
+  #hostname = "placeholder_hostname"
+  ## If set to true, do no set the "host" tag in the telegraf agent.
+  omit_hostname = true
+
+
+###############################################################################
+#                            OUTPUT PLUGINS                                   #
+###############################################################################
+
+# Send aggregate metrics to Azure Monitor
+[[outputs.azure_monitor]]
+  ## Timeout for HTTP writes.
+  # timeout = "20s"
+
+  ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
+  namespace_prefix = "Insights.Container/"
+
+  ## Azure Monitor doesn't have a string value type, so convert string
+  ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows
+  ## a maximum of 10 dimensions so Telegraf will only send the first 10
+  ## alphanumeric dimensions.
+  strings_as_dimensions = true
+
+  ## Both region and resource_id must be set or be available via the
+  ## Instance Metadata service on Azure Virtual Machines.
+  #
+  ## Azure Region to publish metrics against.
+  ##   ex: region = "southcentralus"
+  region = "placeholder_region"
+  #
+  ## The Azure Resource ID against which metric will be logged, e.g.
+  #resource_id = "/subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/virtualMachines/<vm_name>"
+  resource_id = "placeholder_resource_id"
+
+  #azure_tenant_id = "placeholder_azure_tenant_id"
+
+  #azure_client_id = "placeholder_azure_client_id"
+
+  #azure_client_secret = "placeholder_azure_client_secret"
+
+  #namepass = ["nodes", "pods", "containers","prometheus"]
+  namedrop = ["filestat"]
+  tagdrop = ["AgentVersion","AKS_RESOURCE_ID","Region","ClusterName","ClusterType", "Computer", "ControllerType"]
+
+[[outputs.application_insights]]
+  ## Instrumentation key of the Application Insights resource.
+  instrumentation_key = "$APPLICATIONINSIGHTS_KEY"
+
+  ## Timeout for closing (default: 5s).
+  # timeout = "5s"
+
+  ## Enable additional diagnostic logging.
+  # enable_diagnostic_logging = false
+
+  ## Context Tag Sources add Application Insights context tags to a tag value.
+  ##
+  ## For list of allowed context tag keys see:
+  ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go
+  # [outputs.application_insights.context_tag_sources]
+  #   "ai.cloud.role" = "kubernetes_container_name"
+  #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
+  namepass = ["filestat"]
+
+###############################################################################
+#                            PROCESSOR PLUGINS                                #
+###############################################################################
+
+# # Convert values to another metric value type
+# [[processors.converter]]
+#   ## Tags to convert
+#   ##
+#   ## The table key determines the target type, and the array of key-values
+#   ## select the keys to convert.  The array may contain globs.
+#   ##   <target-type> = [<tag-key>...]
+#   [processors.converter.tags]
+#     string = ["device"]
+#     integer = []
+#     unsigned = []
+#     boolean = []
+#     float = []
+#
+#   ## Fields to convert
+#   ##
+#   ## The table key determines the target type, and the array of key-values
+#   ## select the keys to convert.  The array may contain globs.
+#   ##   <target-type> = [<field-key>...]
+#   [processors.converter.fields]
+#     tag = ["host"]
+#     string = []
+#     integer = []
+#     unsigned = []
+#     boolean = []
+#     float = []
+
+
+# # Map enum values according to given table.
+# [[processors.enum]]
+#   [[processors.enum.mapping]]
+#     ## Name of the field to map
+#     field = "status"
+#
+#     ## Destination field to be used for the mapped value.  By default the source
+#     ## field is used, overwriting the original value.
+#     # dest = "status_code"
+#
+#     ## Default value to be used for all values not contained in the mapping
+#     ## table.  When unset, the unmodified value for the field will be used if no
+#     ## match is found.
+#     # default = 0
+#
+#     ## Table of mappings
+#     [processors.enum.mapping.value_mappings]
+#       green = 1
+#       yellow = 2
+#       red = 3
+
+
+# # Apply metric modifications using override semantics.
+# [[processors.override]]
+#   ## All modifications on inputs and aggregators can be overridden:
+#   # name_override = "new_name"
+#   # name_prefix = "new_name_prefix"
+#   # name_suffix = "new_name_suffix"
+#
+#   ## Tags to be added (all values must be strings)
+#   # [processors.override.tags]
+#   #   additional_tag = "tag_value"
+
+
+# # Parse a value in a specified field/tag(s) and add the result in a new metric
+# [[processors.parser]]
+#   ## The name of the fields whose value will be parsed.
+#   parse_fields = []
+#
+#   ## If true, incoming metrics are not emitted.
+#   drop_original = false
+#
+#   ## If set to override, emitted metrics will be merged by overriding the
+#   ## original metric using the newly parsed metrics.
+#   merge = "override"
+#
+#   ## The dataformat to be read from files
+#   ## Each data format has its own unique set of configuration options, read
+#   ## more about them here:
+#   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+#   data_format = "influx"
+
+
+# # Print all metrics that pass through this filter.
+# [[processors.printer]]
+
+
+# # Transforms tag and field values with regex pattern
+# [[processors.regex]]
+#   ## Tag and field conversions defined in a separate sub-tables
+#   # [[processors.regex.tags]]
+#   #   ## Tag to change
+#   #   key = "resp_code"
+#   #   ## Regular expression to match on a tag value
+#   #   pattern = "^(\\d)\\d\\d$"
+#   #   ## Pattern for constructing a new value (${1} represents first subgroup)
+#   #   replacement = "${1}xx"
+#
+#   # [[processors.regex.fields]]
+#   #   key = "request"
+#   #   ## All the power of the Go regular expressions available here
+#   #   ## For example, named subgroups
+#   #   pattern = "^/api(?P<method>/[\\w/]+)\\S*"
+#   #   replacement = "${method}"
+#   #   ## If result_key is present, a new field will be created
+#   #   ## instead of changing existing field
+#   #   result_key = "method"
+#
+#   ## Multiple conversions may be applied for one field sequentially
+#   ## Let's extract one more value
+#   # [[processors.regex.fields]]
+#   #   key = "request"
+#   #   pattern = ".*category=(\\w+).*"
+#   #   replacement = "${1}"
+#   #   result_key = "search_category"
+
+
+# # Rename measurements, tags, and fields that pass through this filter.
+# [[processors.rename]]
+
+
+# # Perform string processing on tags, fields, and measurements
+[[processors.rename]]
+  [[processors.rename.replace]]
+     measurement = "kubernetes_daemonset"
+     dest = "daemonsets"
+  [[processors.rename.replace]]
+     measurement = "kubernetes_deployment"
+     dest = "deployments"
+  [[processors.rename.replace]]
+     measurement = "kubernetes_node"
+     dest = "nodes"
+  [[processors.rename.replace]]
+     tag = "node_name"
+     dest = "host"
+  [[processors.rename.replace]]
+     measurement = "kubernetes_pod_container"
+     dest = "containers"
+  
+#   ## Convert a tag value to uppercase
+#   # [[processors.strings.uppercase]]
+#   #   tag = "method"
+#
+#   ## Convert a field value to lowercase and store in a new field
+#   # [[processors.strings.lowercase]]
+#   #   field = "uri_stem"
+#   #   dest = "uri_stem_normalised"
+#
+#   ## Trim leading and trailing whitespace using the default cutset
+#   # [[processors.strings.trim]]
+#   #   field = "message"
+#
+#   ## Trim leading characters in cutset
+#   # [[processors.strings.trim_left]]
+#   #   field = "message"
+#   #   cutset = "\t"
+#
+#   ## Trim trailing characters in cutset
+#   # [[processors.strings.trim_right]]
+#   #   field = "message"
+#   #   cutset = "\r\n"
+#
+#   ## Trim the given prefix from the field
+#   # [[processors.strings.trim_prefix]]
+#   #   field = "my_value"
+#   #   prefix = "my_"
+#
+#   ## Trim the given suffix from the field
+#   # [[processors.strings.trim_suffix]]
+#   #   field = "read_count"
+#   #   suffix = "_count"
+
+
+# # Print all metrics that pass through this filter.
+# [[processors.topk]]
+#   ## How many seconds between aggregations
+#   # period = 10
+#
+#   ## How many top metrics to return
+#   # k = 10
+#
+#   ## Over which tags should the aggregation be done. Globs can be specified, in
+#   ## which case any tag matching the glob will aggregated over. If set to an
+#   ## empty list is no aggregation over tags is done
+#   # group_by = ['*']
+#
+#   ## Over which fields are the top k are calculated
+#   # fields = ["value"]
+#
+#   ## What aggregation to use. Options: sum, mean, min, max
+#   # aggregation = "mean"
+#
+#   ## Instead of the top k largest metrics, return the bottom k lowest metrics
+#   # bottomk = false
+#
+#   ## The plugin assigns each metric a GroupBy tag generated from its name and
+#   ## tags. If this setting is different than "" the plugin will add a
+#   ## tag (which name will be the value of this setting) to each metric with
+#   ## the value of the calculated GroupBy tag. Useful for debugging
+#   # add_groupby_tag = ""
+#
+#   ## These settings provide a way to know the position of each metric in
+#   ## the top k. The 'add_rank_field' setting allows to specify for which
+#   ## fields the position is required. If the list is non empty, then a field
+#   ## will be added to each and every metric for each string present in this
+#   ## setting. This field will contain the ranking of the group that
+#   ## the metric belonged to when aggregated over that field.
+#   ## The name of the field will be set to the name of the aggregation field,
+#   ## suffixed with the string '_topk_rank'
+#   # add_rank_fields = []
+#
+#   ## These settings provide a way to know what values the plugin is generating
+#   ## when aggregating metrics. The 'add_agregate_field' setting allows to
+#   ## specify for which fields the final aggregation value is required. If the
+#   ## list is non empty, then a field will be added to each every metric for
+#   ## each field present in this setting. This field will contain
+#   ## the computed aggregation for the group that the metric belonged to when
+#   ## aggregated over that field.
+#   ## The name of the field will be set to the name of the aggregation field,
+#   ## suffixed with the string '_topk_aggregate'
+#   # add_aggregate_fields = []
+
+
+
+###############################################################################
+#                            AGGREGATOR PLUGINS                               #
+###############################################################################
+
+# # Keep the aggregate basicstats of each metric passing through.
+# [[aggregators.basicstats]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+
+
+# # Create aggregate histograms.
+# [[aggregators.histogram]]
+#   ## The period in which to flush the aggregator.
+#   period = "30s"
+#
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+#
+#   ## Example config that aggregates all fields of the metric.
+#   # [[aggregators.histogram.config]]
+#   #   ## The set of buckets.
+#   #   buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
+#   #   ## The name of metric.
+#   #   measurement_name = "cpu"
+#
+#   ## Example config that aggregates only specific fields of the metric.
+#   # [[aggregators.histogram.config]]
+#   #   ## The set of buckets.
+#   #   buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
+#   #   ## The name of metric.
+#   #   measurement_name = "diskio"
+#   #   ## The concrete fields of metric
+#   #   fields = ["io_time", "read_time", "write_time"]
+
+
+# # Keep the aggregate min/max of each metric passing through.
+# [[aggregators.minmax]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+
+
+# # Count the occurance of values in fields.
+# [[aggregators.valuecounter]]
+#   ## General Aggregator Arguments:
+#   ## The period on which to flush & clear the aggregator.
+#   period = "30s"
+#   ## If true, the original metric will be dropped by the
+#   ## aggregator and will not get sent to the output plugins.
+#   drop_original = false
+#   ## The fields for which the values will be counted
+#   fields = []
+
+
+
+###############################################################################
+#                            INPUT PLUGINS                                    #
+###############################################################################
+
+# Read metrics about cpu usage
+#[[inputs.cpu]]
+  ## Whether to report per-cpu stats or not
+#  percpu = false
+  ## Whether to report total system cpu stats or not
+#  totalcpu = true
+  ## If true, collect raw CPU time metrics.
+#  collect_cpu_time = false
+  ## If true, compute and report the sum of all non-idle CPU states.
+#  report_active = true
+#  fieldpass = ["usage_active","cluster","node","host","device"]
+#  taginclude = ["cluster","cpu","node"]
+  
+
+
+  # Read metrics from one or many prometheus clients
+#[[inputs.prometheus]]
+  ## An array of urls to scrape metrics from.
+#  urls = ["https://$METRICS_SERVER_SERVICE_HOST/metrics"]
+
+  ## An array of Kubernetes services to scrape metrics from.
+  # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
+
+  ## Kubernetes config file to create client from.
+  # kube_config = "/path/to/kubernetes.config"
+
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to 'https' & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
+  # monitor_kubernetes_pods = true
+
+  ## Use bearer token for authorization
+#  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+#  response_timeout = "15s"
+
+  ## Optional TLS Config
+#  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  # tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+#  insecure_skip_verify = true
+  # Read stats about given file(s)
+[[inputs.filestat]]
+  ## Files to gather stats about.
+  ## These accept standard unix glob matching rules, but with the addition of
+  ## ** as a "super asterisk". See https://github.com/gobwas/glob.
+  files = ["/var/opt/microsoft/docker-cimprov/log/telegraf.log"]
+  ## If true, read the entire file and calculate an md5 checksum.
+  md5 = false
+[[inputs.kube_inventory]]
+  ## URL for the Kubernetes API
+  #url = "https://127.0.0.1"
+  url = "$K8SSERVICEHOST"
+
+  ## Namespace to use
+  # namespace = "default"
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  ## OR
+  # bearer_token_string = "abc_123"
+
+  ## Set response_timeout (default 5 seconds)
+  response_timeout = "15s"
+
+  ## Optional Resources to exclude from gathering
+  ## Leave them with blank with try to gather everything available.
+  ## Values can be - "daemonsets", deployments", "nodes", "persistentvolumes",
+  ## "persistentvolumeclaims", "pods", "statefulsets"
+  # resource_exclude = [ "deployments", "nodes", "statefulsets" ]
+
+  ## Optional Resources to include when gathering
+  ## Overrides resource_exclude if both set.
+  # resource_include = [ "deployments", "nodes", "statefulsets" ]
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  # tls_cert = "/path/to/certfile"
+  # tls_key = "/path/to/keyfile"
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
+
+  fieldpass = ["current_number_scheduled", "desired_number_scheduled", "number_available", "number_unavailable", "number_ready", "replicas_available", "replicas_unavailable", "capacity_cpu_cores", "capacity_memory_bytes", "capacity_pods", "allocatable_pods", "allocatable_cpu_cores", "allocatable_memory_bytes", "restarts_total","resource_requests_cpu_units", "resource_requests_memory_bytes", "resource_limits_cpu_units", "resource_limits_memory_bytes"]
+  taginclude = ["node_name", "daemonset_name", "namespace", "deployment_name", "container_name", "namespace", "node_name"]
\ No newline at end of file
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 12a42d493..2652ae82b 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -22,6 +22,13 @@
   # user = "$USER"
   # cluster = "$ACS_RESOURCE_NAME"
   #node = $NODE_IP
+  AgentVersion = "$AGENT_VERSION"
+  AKS_RESOURCE_ID = "$AKS_RESOURCE_ID"
+  Region = "$AKS_REGION"
+  ClusterName = "$AKS_CLUSTER_NAME"
+  ClusterType = "AKS"
+  Computer = "placeholder_hostname"
+  ControllerType = "$CONTROLLER_TYPE"
 
 
 # Configuration for telegraf agent
@@ -115,8 +122,28 @@
 
   #azure_client_secret = "placeholder_azure_client_secret"
 
+  #namepass = ["nodes", "pods", "containers","prometheus"]
+  namedrop = ["filestat"]
+  tagdrop = ["AgentVersion","AKS_RESOURCE_ID","Region","ClusterName","ClusterType", "Computer", "ControllerType"]
 
+[[outputs.application_insights]]
+  ## Instrumentation key of the Application Insights resource.
+  instrumentation_key = "$APPLICATIONINSIGHTS_KEY"
 
+  ## Timeout for closing (default: 5s).
+  # timeout = "5s"
+
+  ## Enable additional diagnostic logging.
+  # enable_diagnostic_logging = false
+
+  ## Context Tag Sources add Application Insights context tags to a tag value.
+  ##
+  ## For list of allowed context tag keys see:
+  ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go
+  # [outputs.application_insights.context_tag_sources]
+  #   "ai.cloud.role" = "kubernetes_container_name"
+  #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
+  namepass = ["filestat"]
 
 ###############################################################################
 #                            PROCESSOR PLUGINS                                #
@@ -252,9 +279,6 @@
   [[processors.rename.replace]]
      field = "used"
      dest = "diskUsedBytes"
-  [[processors.rename.replace]]
-     field = "total"
-     dest = "diskTotalBytes"
   [[processors.rename.replace]]
      field = "used_percent"
      dest = "diskUsedPercentage"
@@ -273,21 +297,12 @@
   [[processors.rename.replace]]
      field = "err_out"
      dest = "networkErrorsOutTotal"
-  [[processors.rename.replace]]
-     measurement = "diskio"
-     dest = "nodes"
-  [[processors.rename.replace]]
-     field = "iops_in_progress"
-     dest = "diskIopsInProgress"
   [[processors.rename.replace]]
      measurement = "kubernetes_pod_volume"
      dest = "pods"
   [[processors.rename.replace]]
      field = "used_bytes"
      dest = "podVolumeUsedBytes"
-  [[processors.rename.replace]]
-     field = "capacity_bytes"
-     dest = "podVolumeCapacityBytes"
   [[processors.rename.replace]]
      field = "available_bytes"
      dest = "podVolumeAvailableBytes"
@@ -306,6 +321,7 @@
   [[processors.rename.replace]]
      measurement = "docker_container_status"
      dest = "containers"
+  
 #   ## Convert a tag value to uppercase
 #   # [[processors.strings.uppercase]]
 #   #   tag = "method"
@@ -481,8 +497,8 @@
 
   ## Ignore mount points by filesystem type.
   ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"]
-  fieldpass = ["free", "used", "total", "used_percent"]
-  taginclude = ["device","fstype","mode","path","host"]
+  fieldpass = ["free", "used", "used_percent"]
+  taginclude = ["device","path","host"]
 
 
 # Read metrics about memory usage
@@ -508,33 +524,6 @@
   fieldpass = ["err_in", "err_out"]
   taginclude = ["interface","host"]
 
-# Read metrics about disk IO by device
-[[inputs.diskio]]
-  ## By default, telegraf will gather stats for all devices including
-  ## disk partitions.
-  ## Setting devices will restrict the stats to the specified devices.
-  # devices = ["sda", "sdb", "vd*"]
-  ## Uncomment the following line if you need disk serial numbers.
-  # skip_serial_number = false
-  #
-  ## On systems which support it, device metadata can be added in the form of
-  ## tags.
-  ## Currently only Linux is supported via udev properties. You can view
-  ## available properties for a device by running:
-  ## 'udevadm info -q property -n /dev/sda'
-  # device_tags = ["ID_FS_TYPE", "ID_FS_USAGE"]
-  #
-  ## Using the same metadata source as device_tags, you can also customize the
-  ## name of the device via templates.
-  ## The 'name_templates' parameter is a list of templates to try and apply to
-  ## the device. The template may contain variables in the form of '$PROPERTY' or
-  ## '${PROPERTY}'. The first template which does not contain any variables not
-  ## present for the device is used as the device name tag.
-  ## The typical use case is for LVM volumes, to get the VG/LV name instead of
-  ## the near-meaningless DM-0 name.
-  # name_templates = ["$ID_FS_LABEL","$DM_VG_NAME/$DM_LV_NAME"]
-  fieldpass = ["iops_in_progress"]
-  taginclude = ["name","host"]
 # Read metrics from the kubernetes kubelet api
 [[inputs.kubernetes]]
   ## URL for the kubelet
@@ -553,7 +542,7 @@
   # tls_key = /path/to/keyfile
   ## Use TLS but skip chain & host verification
   # insecure_skip_verify = false
-  fieldpass = ["used_bytes", "capacity_bytes", "available_bytes", "tx_errors", "rx_errors"  ]
+  fieldpass = ["used_bytes", "available_bytes", "tx_errors", "rx_errors"  ]
   taginclude = ["volume_name","host","namespace","pod_name"]
 # Read metrics about docker containers
 [[inputs.docker]]
@@ -600,5 +589,12 @@
   # tls_key = "/etc/telegraf/key.pem"
   ## Use TLS but skip chain & host verification
   # insecure_skip_verify = false
-  fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images" ,"oomkilled", "exitcode" ]
-  taginclude = ["host", "container_name", "container_status", "container_image"]
\ No newline at end of file
+  fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images" ,"oomkilled" ]
+  taginclude = ["host", "container_name", "container_status", "container_image"]
+[[inputs.filestat]]
+  ## Files to gather stats about.
+  ## These accept standard unix glob matching rules, but with the addition of
+  ## ** as a "super asterisk". See https://github.com/gobwas/glob.
+  files = ["/var/opt/microsoft/docker-cimprov/log/telegraf.log"]
+  ## If true, read the entire file and calculate an md5 checksum.
+  md5 = false
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 155f5ef8c..f3075c64d 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -92,6 +92,7 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
+/etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/custom_metrics_regions.conf;		installer/conf/custom_metrics_regions.conf;      644; root; root
 
 %Links

From 185c4cafc867f34073a6bc4b2fc8f80bbc6e93cf Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Thu, 7 Feb 2019 23:42:07 -0800
Subject: [PATCH 17/38] fix oomkilled

---
 installer/conf/telegraf.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 2652ae82b..6dfcdaeac 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -589,7 +589,7 @@
   # tls_key = "/etc/telegraf/key.pem"
   ## Use TLS but skip chain & host verification
   # insecure_skip_verify = false
-  fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images" ,"oomkilled" ]
+  fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images"]
   taginclude = ["host", "container_name", "container_status", "container_image"]
 [[inputs.filestat]]
   ## Files to gather stats about.

From e76ef3af26dc88be2cc71bfda8f647a1430a4e26 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 01:53:02 -0800
Subject: [PATCH 18/38] Add telegraf 403 metric telemetry & non 403 trace
 telemetry

---
 installer/conf/td-agent-bit-rs.conf      | 25 ++++++++++++++++++++
 installer/conf/td-agent-bit.conf         | 18 ++++++++++++---
 installer/conf/telegraf-rs.conf          | 29 ++++++++++++++++++++----
 installer/conf/telegraf.conf             | 28 +++++++++++++++++++----
 installer/datafiles/base_container.data  |  2 ++
 installer/scripts/Telegraf403Telemery.sh |  3 +++
 source/code/go/src/plugins/out_oms.go    | 12 ++++++----
 source/code/go/src/plugins/telemetry.go  |  6 +++--
 8 files changed, 105 insertions(+), 18 deletions(-)
 create mode 100644 installer/conf/td-agent-bit-rs.conf
 create mode 100644 installer/scripts/Telegraf403Telemery.sh

diff --git a/installer/conf/td-agent-bit-rs.conf b/installer/conf/td-agent-bit-rs.conf
new file mode 100644
index 000000000..19239708b
--- /dev/null
+++ b/installer/conf/td-agent-bit-rs.conf
@@ -0,0 +1,25 @@
+[SERVICE]
+    Flush         30
+    Log_Level     info
+    Parsers_File  /etc/td-agent-bit/parsers.conf
+    Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log
+
+[INPUT]
+    Name tail
+    Tag oms.container.log.telegraf.err.*
+    Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
+    DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
+    Mem_Buf_Limit 30m
+    Path_Key filepath
+    Skip_Long_Lines On
+
+[FILTER]
+    Name grep
+    Match oms.container.log.telegraf.err.*
+    Regex log /^(?:(?!\[azure_monitor\]: failed to write batch: \[403\] 403 Forbidden).)*$/
+
+[OUTPUT]
+    Name                            oms
+    EnableTelemetry                 true
+    TelemetryPushIntervalSeconds    300
+    Match                           oms.container.log.*
\ No newline at end of file
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 29c98bdf1..9771a4c96 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -23,10 +23,22 @@
     Path_Key filepath
     Skip_Long_Lines On
 
+[INPUT]
+    Name tail
+    Tag oms.container.log.telegraf.err.*
+    Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
+    DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
+    Mem_Buf_Limit 30m
+    Path_Key filepath
+    Skip_Long_Lines On
+
+[FILTER]
+    Name grep
+    Match oms.container.log.telegraf.err.*
+    Regex log /^(?:(?!\[azure_monitor\]: failed to write batch: \[403\] 403 Forbidden).)*$/
+
 [OUTPUT]
     Name                            oms
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
-    Match                           oms.container.log.*
-    AgentVersion                    ciprod01092019
-
+    Match                           oms.container.log.*
\ No newline at end of file
diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index fc8abfe26..85d2d008b 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -74,9 +74,9 @@
 
   ## Logging configuration:
   ## Run telegraf with debug log messages.
-  debug = true
+  debug = false
   ## Run telegraf in quiet mode (error log messages only).
-  quiet = false
+  quiet = true
   ## Specify the log file name. The empty string means to log to stderr.
   logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
 
@@ -122,7 +122,7 @@
   #azure_client_secret = "placeholder_azure_client_secret"
 
   #namepass = ["nodes", "pods", "containers","prometheus"]
-  namedrop = ["filestat"]
+  namedrop = ["filestat", "telegraf_telemetry"]
   tagdrop = ["AgentVersion","AKS_RESOURCE_ID","Region","ClusterName","ClusterType", "Computer", "ControllerType"]
 
 [[outputs.application_insights]]
@@ -142,7 +142,7 @@
   # [outputs.application_insights.context_tag_sources]
   #   "ai.cloud.role" = "kubernetes_container_name"
   #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
-  namepass = ["filestat"]
+  namepass = ["filestat", "telegraf_telemetry"]
 
 ###############################################################################
 #                            PROCESSOR PLUGINS                                #
@@ -486,6 +486,7 @@
   # Read stats about given file(s)
 [[inputs.filestat]]
   ## Files to gather stats about.
+  interval = "15m"
   ## These accept standard unix glob matching rules, but with the addition of
   ## ** as a "super asterisk". See https://github.com/gobwas/glob.
   files = ["/var/opt/microsoft/docker-cimprov/log/telegraf.log"]
@@ -525,4 +526,22 @@
   insecure_skip_verify = true
 
   fieldpass = ["current_number_scheduled", "desired_number_scheduled", "number_available", "number_unavailable", "number_ready", "replicas_available", "replicas_unavailable", "capacity_cpu_cores", "capacity_memory_bytes", "capacity_pods", "allocatable_pods", "allocatable_cpu_cores", "allocatable_memory_bytes", "restarts_total","resource_requests_cpu_units", "resource_requests_memory_bytes", "resource_limits_cpu_units", "resource_limits_memory_bytes"]
-  taginclude = ["node_name", "daemonset_name", "namespace", "deployment_name", "container_name", "namespace", "node_name"]
\ No newline at end of file
+  taginclude = ["node_name", "daemonset_name", "namespace", "deployment_name", "container_name", "namespace", "node_name"]
+[[inputs.exec]]
+  ## Commands array
+  interval = "15m"
+  commands = [
+    "/opt/microsoft/docker-cimprov/bin/Telegraf403Telemetry.sh"
+  ]
+
+  ## Timeout for each command to complete.
+  timeout = "15s"
+
+  ## measurement name suffix (for separating different commands)
+  name_suffix = "_telemetry"
+
+  ## Data format to consume.
+  ## Each data format has its own unique set of configuration options, read
+  ## more about them here:
+  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "influx"
\ No newline at end of file
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 6dfcdaeac..e35bb1cd0 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -75,9 +75,9 @@
 
   ## Logging configuration:
   ## Run telegraf with debug log messages.
-  debug = true
+  debug = false
   ## Run telegraf in quiet mode (error log messages only).
-  quiet = false
+  quiet = true
   ## Specify the log file name. The empty string means to log to stderr.
   logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
 
@@ -123,7 +123,7 @@
   #azure_client_secret = "placeholder_azure_client_secret"
 
   #namepass = ["nodes", "pods", "containers","prometheus"]
-  namedrop = ["filestat"]
+  namedrop = ["filestat", "telegraf_telemetry"]
   tagdrop = ["AgentVersion","AKS_RESOURCE_ID","Region","ClusterName","ClusterType", "Computer", "ControllerType"]
 
 [[outputs.application_insights]]
@@ -143,7 +143,7 @@
   # [outputs.application_insights.context_tag_sources]
   #   "ai.cloud.role" = "kubernetes_container_name"
   #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
-  namepass = ["filestat"]
+  namepass = ["filestat", "telegraf_telemetry"]
 
 ###############################################################################
 #                            PROCESSOR PLUGINS                                #
@@ -593,8 +593,28 @@
   taginclude = ["host", "container_name", "container_status", "container_image"]
 [[inputs.filestat]]
   ## Files to gather stats about.
+  interval = "15m"
   ## These accept standard unix glob matching rules, but with the addition of
   ## ** as a "super asterisk". See https://github.com/gobwas/glob.
   files = ["/var/opt/microsoft/docker-cimprov/log/telegraf.log"]
   ## If true, read the entire file and calculate an md5 checksum.
   md5 = false
+[[inputs.exec]]
+  ## Commands array
+  interval = "15m"
+  commands = [
+    "/opt/microsoft/docker-cimprov/bin/Telegraf403Telemetry.sh"
+  ]
+
+  ## Timeout for each command to complete.
+  timeout = "15s"
+
+  ## measurement name suffix (for separating different commands)
+  name_suffix = "_telemetry"
+
+  ## Data format to consume.
+  ## Each data format has its own unique set of configuration options, read
+  ## more about them here:
+  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "influx"
+
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index f3075c64d..e7a3323aa 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -90,10 +90,12 @@ MAINTAINER:              'Microsoft Corporation'
 
 /opt/td-agent-bit/bin/out_oms.so;                                intermediate/${{BUILD_CONFIGURATION}}/out_oms.so;  	755; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
+/etc/opt/microsoft/docker-cimprov/td-agent-bit-rs.conf;			    installer/conf/td-agent-bit-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/custom_metrics_regions.conf;		installer/conf/custom_metrics_regions.conf;      644; root; root
+/opt/microsoft/docker-cimprov/bin/Telegraf403Telemetry.sh;	        installer/scripts/Telegraf403Telemetry.sh;      755; root; root
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
diff --git a/installer/scripts/Telegraf403Telemery.sh b/installer/scripts/Telegraf403Telemery.sh
new file mode 100644
index 000000000..3754538c6
--- /dev/null
+++ b/installer/scripts/Telegraf403Telemery.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+count403=$(grep -iF  "[azure_monitor]: failed to write batch: [403] 403 Forbidden" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l)
+echo "telegraf,AKS_RESOURCE_ID=$AKS_RESOURCE_ID, 403count=$count403"
\ No newline at end of file
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 133e0f039..7747fc7bb 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -2,11 +2,13 @@ package main
 
 import (
 	"github.com/fluent/fluent-bit-go/output"
+	"github.com/Microsoft/ApplicationInsights-Go/appinsights"
 )
 import (
 	"C"
 	"strings"
 	"unsafe"
+	"os"
 )
 
 //export FLBPluginRegister
@@ -19,7 +21,7 @@ func FLBPluginRegister(ctx unsafe.Pointer) int {
 // ctx (context) pointer to fluentbit context (state/ c code)
 func FLBPluginInit(ctx unsafe.Pointer) int {
 	Log("Initializing out_oms go plugin for fluentbit")
-	agentVersion := output.FLBPluginConfigKey(ctx, "AgentVersion")
+	agentVersion := os.Getenv("AGENT_VERSION")
 	InitializePlugin(ContainerLogPluginConfFilePath, agentVersion)
 	enableTelemetry := output.FLBPluginConfigKey(ctx, "EnableTelemetry")
 	if strings.Compare(strings.ToLower(enableTelemetry), "true") == 0 {
@@ -51,9 +53,11 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 		records = append(records, record)
 	}
 
-	incomingTag := C.GoString(tag)
-	if strings.Contains(strings.ToLower(incomingTag), "oms.container.log.flbplugin") {
-		return PushToAppInsightsTraces(records)
+	incomingTag := strings.ToLower(C.GoString(tag))
+	if strings.Contains(incomingTag, "oms.container.log.flbplugin") {
+		return PushToAppInsightsTraces(records, appinsights.Information, incomingTag)
+	} else if strings.Contains(incomingTag, "oms.container.log.telegraf.err") {
+		return PushToAppInsightsTraces(records, appinsights.Error, incomingTag)
 	}
 
 	return PostDataHelper(records)
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 82f970d3a..9e8dd057c 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -159,13 +159,15 @@ func InitializeTelemetryClient(agentVersion string) (int, error) {
 }
 
 // PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance
-func PushToAppInsightsTraces(records []map[interface{}]interface{}) int {
+func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel int, tag string) int {
 	var logLines []string
 	for _, record := range records {
 		logLines = append(logLines, ToString(record["log"]))
 	}
 
 	traceEntry := strings.Join(logLines, "\n")
-	TelemetryClient.TrackTrace(traceEntry, 1)
+	traceTelemetryItem := appinsights.NewTraceTelemetry(traceEntry, severityLevel)
+	traceTelemetryItem.Properties["tag"] = tag
+	TelemetryClient.Track(traceTelemetryItem)
 	return output.FLB_OK
 }

From 57a2797cb24b021e3bdda118b24f645eec198c50 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 02:01:00 -0800
Subject: [PATCH 19/38] fix type

---
 source/code/go/src/plugins/telemetry.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 9e8dd057c..03a787354 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -159,7 +159,7 @@ func InitializeTelemetryClient(agentVersion string) (int, error) {
 }
 
 // PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance
-func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel int, tag string) int {
+func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int {
 	var logLines []string
 	for _, record := range records {
 		logLines = append(logLines, ToString(record["log"]))

From 68db7f3cf4684dde41286afa81b29b0122195460 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 02:04:47 -0800
Subject: [PATCH 20/38] fix package

---
 source/code/go/src/plugins/telemetry.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 03a787354..0b2feec59 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -159,7 +159,7 @@ func InitializeTelemetryClient(agentVersion string) (int, error) {
 }
 
 // PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance
-func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int {
+func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel appinsights.contracts.SeverityLevel, tag string) int {
 	var logLines []string
 	for _, record := range records {
 		logLines = append(logLines, ToString(record["log"]))

From 1d2bd74e6da4b16ffdde0c73b2317ec4d38f8275 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 02:09:31 -0800
Subject: [PATCH 21/38] fix package import

---
 source/code/go/src/plugins/telemetry.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index 0b2feec59..acf97042b 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -9,6 +9,7 @@ import (
 	"time"
 
 	"github.com/Microsoft/ApplicationInsights-Go/appinsights"
+	"github.com/Microsoft/ApplicationInsights-Go/appinsights/contracts"
 	"github.com/fluent/fluent-bit-go/output"
 )
 
@@ -159,7 +160,7 @@ func InitializeTelemetryClient(agentVersion string) (int, error) {
 }
 
 // PushToAppInsightsTraces sends the log lines as trace messages to the configured App Insights Instance
-func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel appinsights.contracts.SeverityLevel, tag string) int {
+func PushToAppInsightsTraces(records []map[interface{}]interface{}, severityLevel contracts.SeverityLevel, tag string) int {
 	var logLines []string
 	for _, record := range records {
 		logLines = append(logLines, ToString(record["log"]))

From 9fa30b7dad1de8a0d473693eae8108b971caa4f4 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 02:16:24 -0800
Subject: [PATCH 22/38] fix filename

---
 installer/scripts/Telegraf403Telemetry.sh | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 installer/scripts/Telegraf403Telemetry.sh

diff --git a/installer/scripts/Telegraf403Telemetry.sh b/installer/scripts/Telegraf403Telemetry.sh
new file mode 100644
index 000000000..3754538c6
--- /dev/null
+++ b/installer/scripts/Telegraf403Telemetry.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+count403=$(grep -iF  "[azure_monitor]: failed to write batch: [403] 403 Forbidden" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l)
+echo "telegraf,AKS_RESOURCE_ID=$AKS_RESOURCE_ID, 403count=$count403"
\ No newline at end of file

From 7979e7c5e6887e3a52f85936303970b87e7c4125 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 09:18:10 -0800
Subject: [PATCH 23/38] delete unused file

---
 installer/scripts/Telegraf403Telemery.sh | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 installer/scripts/Telegraf403Telemery.sh

diff --git a/installer/scripts/Telegraf403Telemery.sh b/installer/scripts/Telegraf403Telemery.sh
deleted file mode 100644
index 3754538c6..000000000
--- a/installer/scripts/Telegraf403Telemery.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-count403=$(grep -iF  "[azure_monitor]: failed to write batch: [403] 403 Forbidden" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l)
-echo "telegraf,AKS_RESOURCE_ID=$AKS_RESOURCE_ID, 403count=$count403"
\ No newline at end of file

From 95e7b9945e74298157ee51f30cce9eafe00bfad8 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 12:31:31 -0800
Subject: [PATCH 24/38] conf file for rs; fix 403counttotal metric for
 telegraf, remove host and use nodeName consistently, rename metrics

---
 installer/conf/out_oms-rs.conf            |  6 ++
 installer/conf/telegraf-rs.conf           | 88 +++++++++++++++++++++--
 installer/conf/telegraf.conf              | 39 ++++++++--
 installer/datafiles/base_container.data   |  1 +
 installer/scripts/Telegraf403Telemetry.sh |  4 +-
 source/code/go/src/plugins/oms.go         |  3 +-
 source/code/go/src/plugins/out_oms.go     |  8 ++-
 7 files changed, 132 insertions(+), 17 deletions(-)
 create mode 100644 installer/conf/out_oms-rs.conf

diff --git a/installer/conf/out_oms-rs.conf b/installer/conf/out_oms-rs.conf
new file mode 100644
index 000000000..e3a32a526
--- /dev/null
+++ b/installer/conf/out_oms-rs.conf
@@ -0,0 +1,6 @@
+omsadmin_conf_path=/etc/opt/microsoft/omsagent/conf/omsadmin.conf
+cert_file_path=/etc/opt/microsoft/omsagent/certs/oms.crt
+key_file_path=/etc/opt/microsoft/omsagent/certs/oms.key
+container_host_file_path=/var/opt/microsoft/docker-cimprov/state/containerhostname
+container_inventory_refresh_interval=86400
+kube_system_containers_refresh_interval=86400
diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index 85d2d008b..b01f380e2 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -275,16 +275,91 @@
   [[processors.rename.replace]]
      measurement = "kubernetes_deployment"
      dest = "deployments"
+  [[processors.rename.replace]]
+     measurement = "kubernetes_deployment"
+     dest = "statefulsets"
   [[processors.rename.replace]]
      measurement = "kubernetes_node"
      dest = "nodes"
-  [[processors.rename.replace]]
-     tag = "node_name"
-     dest = "host"
   [[processors.rename.replace]]
      measurement = "kubernetes_pod_container"
      dest = "containers"
-  
+  [[processors.rename.replace]]
+     field = "current_number_scheduled"
+     dest = "currentNumberScheduled"
+  [[processors.rename.replace]]
+     field = "desired_number_scheduled"
+     dest = "desiredNumberScheduled"
+  [[processors.rename.replace]]
+     field = "number_available"
+     dest = "numberAvailable"
+  [[processors.rename.replace]]
+     field = "number_unavailable"
+     dest = "numUnavailable"
+  [[processors.rename.replace]]
+     field = "number_ready"
+     dest = "numReady"
+  [[processors.rename.replace]]
+     field = "replicas_available"
+     dest = "numReplicasAvailable"
+  [[processors.rename.replace]]
+     field = "replicas_unavailable"
+     dest = "numReplicasUnavailable"
+  [[processors.rename.replace]]
+     field = "capacity_cpu_cores"
+     dest = "capacityCpuCores"
+  [[processors.rename.replace]]
+     field = "capacity_memory_bytes"
+     dest = "capacityMemoryBytes"
+  [[processors.rename.replace]]
+     field = "capacity_pods"
+     dest = "capacityNumPods"
+  [[processors.rename.replace]]
+     field = "allocatable_pods"
+     dest = "allocatableNumPods"
+  [[processors.rename.replace]]
+     field = "allocatable_cpu_cores"
+     dest = "allocatableCpuCores"
+  [[processors.rename.replace]]
+     field = "allocatable_memory_bytes"
+     dest = "allocatableMemoryBytes"
+  [[processors.rename.replace]]
+     field = "restarts_total"
+     dest = "restartsTotal"
+  [[processors.rename.replace]]
+     field = "resource_requests_cpu_units"
+     dest = "resourceRequestsCpuUnits"
+  [[processors.rename.replace]]
+     field = "resource_requests_memory_bytes"
+     dest = "resourceRequestsMemoryBytes"
+  [[processors.rename.replace]]
+     field = "resource_limits_cpu_units"
+     dest = "resourceLimitsCpuUnits"
+  [[processors.rename.replace]]
+     field = "resource_limits_memory_bytes"
+     dest = "resourceLimitsMemoryBytes"
+  [[processors.rename.replace]]
+     field = "spec_replicas"
+     dest = "numSpecReplicas"
+  [[processors.rename.replace]]
+     field = "replicas_current"
+     dest = "numCurrentReplicas"
+  [[processors.rename.replace]]
+     field = "replicas_ready"
+     dest = "numReadyReplicas"
+  [[processors.rename.replace]]
+     tag = "daemonset_name"
+     dest = "daemonsetName"
+  [[processors.rename.replace]]
+     tag = "deployment_name"
+     dest = "deploymentName"
+  [[processors.rename.replace]]
+     tag = "container_name"
+     dest = "containerName"
+  [[processors.rename.replace]]
+     tag = "node_name"
+     dest = "nodeName"
+
 #   ## Convert a tag value to uppercase
 #   # [[processors.strings.uppercase]]
 #   #   tag = "method"
@@ -525,8 +600,9 @@
   ## Use TLS but skip chain & host verification
   insecure_skip_verify = true
 
-  fieldpass = ["current_number_scheduled", "desired_number_scheduled", "number_available", "number_unavailable", "number_ready", "replicas_available", "replicas_unavailable", "capacity_cpu_cores", "capacity_memory_bytes", "capacity_pods", "allocatable_pods", "allocatable_cpu_cores", "allocatable_memory_bytes", "restarts_total","resource_requests_cpu_units", "resource_requests_memory_bytes", "resource_limits_cpu_units", "resource_limits_memory_bytes"]
-  taginclude = ["node_name", "daemonset_name", "namespace", "deployment_name", "container_name", "namespace", "node_name"]
+  namepass = ["kubernetes_daemonset", "kubernetes_deployment", "kubernetes_node", "kubernetes_pod_container", "kubernetes_statefulset"]
+  fieldpass = ["current_number_scheduled", "desired_number_scheduled", "number_available", "number_unavailable", "number_ready", "replicas_available", "replicas_unavailable", "capacity_cpu_cores", "capacity_memory_bytes", "capacity_pods", "allocatable_pods", "allocatable_cpu_cores", "allocatable_memory_bytes", "restarts_total","resource_requests_cpu_units", "resource_requests_memory_bytes", "resource_limits_cpu_units", "resource_limits_memory_bytes" , "spec_replicas", "replicas_current", "replicas_ready"]
+  taginclude = ["nodeName", "daemonset_name", "namespace", "deployment_name", "container_name", "namespace", "node_name"]
 [[inputs.exec]]
   ## Commands array
   interval = "15m"
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index e35bb1cd0..a9b27993c 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -30,6 +30,8 @@
   Computer = "placeholder_hostname"
   ControllerType = "$CONTROLLER_TYPE"
 
+  nodeName = "placeholder_hostname"
+
 
 # Configuration for telegraf agent
 [agent]
@@ -82,9 +84,9 @@
   logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
 
   ## Override default hostname, if empty use os.Hostname()
-  hostname = "placeholder_hostname"
+  #hostname = "placeholder_hostname"
   ## If set to true, do no set the "host" tag in the telegraf agent.
-  omit_hostname = false
+  omit_hostname = true
 
 
 ###############################################################################
@@ -144,6 +146,7 @@
   #   "ai.cloud.role" = "kubernetes_container_name"
   #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
   namepass = ["filestat", "telegraf_telemetry"]
+  tagDrop = ["nodeName"]
 
 ###############################################################################
 #                            PROCESSOR PLUGINS                                #
@@ -315,13 +318,34 @@
   [[processors.rename.replace]]
      field = "rx_errors"
      dest = "podNetworkRxErrorsTotal"
+  [[processors.rename.replace]]
+     tag = "volume_name"
+     dest = "volumeName"
+  [[processors.rename.replace]]
+     tag = "pod_name"
+     dest = "podName"
   [[processors.rename.replace]]
      measurement = "docker"
      dest = "containers"
   [[processors.rename.replace]]
      measurement = "docker_container_status"
      dest = "containers"
-  
+   [[processors.rename.replace]]
+     field = "n_containers"
+     dest = "numContainers"
+   [[processors.rename.replace]]
+     field = "n_containers_running"
+     dest = "numContainersRunning"
+   [[processors.rename.replace]]
+     field = "n_containers_stopped"
+     dest = "numContainersStopped"
+   [[processors.rename.replace]]
+     field = "n_containers_paused"
+     dest = "numContainersPaused"
+    [[processors.rename.replace]]
+     field = "n_images"
+     dest = "numContainerImages"
+
 #   ## Convert a tag value to uppercase
 #   # [[processors.strings.uppercase]]
 #   #   tag = "method"
@@ -498,7 +522,7 @@
   ## Ignore mount points by filesystem type.
   ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs"]
   fieldpass = ["free", "used", "used_percent"]
-  taginclude = ["device","path","host"]
+  taginclude = ["device","path","nodeName"]
 
 
 # Read metrics about memory usage
@@ -522,7 +546,7 @@
   ##
   #fieldpass = ["bytes_recv", "bytes_sent", "err_in", "err_out"]
   fieldpass = ["err_in", "err_out"]
-  taginclude = ["interface","host"]
+  taginclude = ["interface","nodeName"]
 
 # Read metrics from the kubernetes kubelet api
 [[inputs.kubernetes]]
@@ -543,7 +567,7 @@
   ## Use TLS but skip chain & host verification
   # insecure_skip_verify = false
   fieldpass = ["used_bytes", "available_bytes", "tx_errors", "rx_errors"  ]
-  taginclude = ["volume_name","host","namespace","pod_name"]
+  taginclude = ["volume_name","nodeName","namespace","pod_name"]
 # Read metrics about docker containers
 [[inputs.docker]]
   ## Docker Endpoint
@@ -590,7 +614,8 @@
   ## Use TLS but skip chain & host verification
   # insecure_skip_verify = false
   fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images"]
-  taginclude = ["host", "container_name", "container_status", "container_image"]
+  #fieldpass = ["numContainers", "numContainersRunning", "numContainersStopped", "numContainersPaused", "numContainerImages"]
+  taginclude = ["nodeName"]
 [[inputs.filestat]]
   ## Files to gather stats about.
   interval = "15m"
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index e7a3323aa..e6e6401d2 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -92,6 +92,7 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit-rs.conf;			    installer/conf/td-agent-bit-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
+/etc/opt/microsoft/docker-cimprov/out_oms-rs.conf;			        installer/conf/out_oms-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/custom_metrics_regions.conf;		installer/conf/custom_metrics_regions.conf;      644; root; root
diff --git a/installer/scripts/Telegraf403Telemetry.sh b/installer/scripts/Telegraf403Telemetry.sh
index 3754538c6..f4476d9fd 100644
--- a/installer/scripts/Telegraf403Telemetry.sh
+++ b/installer/scripts/Telegraf403Telemetry.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
-count403=$(grep -iF  "[azure_monitor]: failed to write batch: [403] 403 Forbidden" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l)
-echo "telegraf,AKS_RESOURCE_ID=$AKS_RESOURCE_ID, 403count=$count403"
\ No newline at end of file
+count403=$(grep -iF  "[azure_monitor]: failed to write batch: [403] 403 Forbidden" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n')
+echo "telegraf,AKS_RESOURCE_ID=$AKS_RESOURCE_ID, 403countTotal=$count403"
\ No newline at end of file
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 5d9269d1e..bf14ac5e6 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -26,7 +26,8 @@ import (
 const DataType = "CONTAINER_LOG_BLOB"
 
 // ContainerLogPluginConfFilePath --> config file path for container log plugin
-const ContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
+const DaemonSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
+const ReplicaSetSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms-rs.conf"
 
 // IPName for Container Log
 const IPName = "Containers"
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 7747fc7bb..059a93fe9 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -22,7 +22,13 @@ func FLBPluginRegister(ctx unsafe.Pointer) int {
 func FLBPluginInit(ctx unsafe.Pointer) int {
 	Log("Initializing out_oms go plugin for fluentbit")
 	agentVersion := os.Getenv("AGENT_VERSION")
-	InitializePlugin(ContainerLogPluginConfFilePath, agentVersion)
+	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "replicaset") == 0) {
+		Log("Using %s for plugin config \n", ReplicaSetContainerLogPluginConfFilePath)
+		InitializePlugin(ReplicaSetContainerLogPluginConfFilePath, agentVersion)
+	} else {
+		Log("Using %s for plugin config \n", DaemonSetContainerLogPluginConfFilePath)
+		InitializePlugin(DaemonSetContainerLogPluginConfFilePath, agentVersion)
+	}
 	enableTelemetry := output.FLBPluginConfigKey(ctx, "EnableTelemetry")
 	if strings.Compare(strings.ToLower(enableTelemetry), "true") == 0 {
 		telemetryPushInterval := output.FLBPluginConfigKey(ctx, "TelemetryPushIntervalSeconds")

From 50d8572391adff44e19edc266ae3cd08fb15c788 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 12:37:01 -0800
Subject: [PATCH 25/38] fix statefulsets

---
 installer/conf/telegraf-rs.conf |  5 ++++-
 installer/conf/telegraf.conf    | 10 +++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index b01f380e2..ece3152ff 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -356,6 +356,9 @@
   [[processors.rename.replace]]
      tag = "container_name"
      dest = "containerName"
+  [[processors.rename.replace]]
+     tag = "statefulset_name"
+     dest = "statefulsetName"
   [[processors.rename.replace]]
      tag = "node_name"
      dest = "nodeName"
@@ -602,7 +605,7 @@
 
   namepass = ["kubernetes_daemonset", "kubernetes_deployment", "kubernetes_node", "kubernetes_pod_container", "kubernetes_statefulset"]
   fieldpass = ["current_number_scheduled", "desired_number_scheduled", "number_available", "number_unavailable", "number_ready", "replicas_available", "replicas_unavailable", "capacity_cpu_cores", "capacity_memory_bytes", "capacity_pods", "allocatable_pods", "allocatable_cpu_cores", "allocatable_memory_bytes", "restarts_total","resource_requests_cpu_units", "resource_requests_memory_bytes", "resource_limits_cpu_units", "resource_limits_memory_bytes" , "spec_replicas", "replicas_current", "replicas_ready"]
-  taginclude = ["nodeName", "daemonset_name", "namespace", "deployment_name", "container_name", "namespace", "node_name"]
+  taginclude = ["nodeName", "daemonset_name", "namespace", "deployment_name", "container_name", "namespace", "node_name","statefulset_name"]
 [[inputs.exec]]
   ## Commands array
   interval = "15m"
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index a9b27993c..4fd57c1b5 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -330,19 +330,19 @@
   [[processors.rename.replace]]
      measurement = "docker_container_status"
      dest = "containers"
-   [[processors.rename.replace]]
+  [[processors.rename.replace]]
      field = "n_containers"
      dest = "numContainers"
-   [[processors.rename.replace]]
+  [[processors.rename.replace]]
      field = "n_containers_running"
      dest = "numContainersRunning"
-   [[processors.rename.replace]]
+  [[processors.rename.replace]]
      field = "n_containers_stopped"
      dest = "numContainersStopped"
-   [[processors.rename.replace]]
+  [[processors.rename.replace]]
      field = "n_containers_paused"
      dest = "numContainersPaused"
-    [[processors.rename.replace]]
+  [[processors.rename.replace]]
      field = "n_images"
      dest = "numContainerImages"
 

From 2f8f4bfc7f8a55656f6e2c7cead3290143f3f4e8 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 12:46:53 -0800
Subject: [PATCH 26/38] fix typo.

---
 source/code/go/src/plugins/out_oms.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 059a93fe9..90ecaf15a 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -22,7 +22,7 @@ func FLBPluginRegister(ctx unsafe.Pointer) int {
 func FLBPluginInit(ctx unsafe.Pointer) int {
 	Log("Initializing out_oms go plugin for fluentbit")
 	agentVersion := os.Getenv("AGENT_VERSION")
-	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "replicaset") == 0) {
+	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "replicaset") == 0 {
 		Log("Using %s for plugin config \n", ReplicaSetContainerLogPluginConfFilePath)
 		InitializePlugin(ReplicaSetContainerLogPluginConfFilePath, agentVersion)
 	} else {

From dd12b3d77f9d9a2b061867ee0fc9fa7ff6d7d0b1 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 12:50:25 -0800
Subject: [PATCH 27/38] fix another typo.

---
 source/code/go/src/plugins/oms.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index bf14ac5e6..d58a33b55 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -27,7 +27,7 @@ const DataType = "CONTAINER_LOG_BLOB"
 
 // ContainerLogPluginConfFilePath --> config file path for container log plugin
 const DaemonSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
-const ReplicaSetSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms-rs.conf"
+const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms-rs.conf"
 
 // IPName for Container Log
 const IPName = "Containers"

From 49c251fa9b5571ae4945b87f5a8a3f17744f41c4 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 13:07:48 -0800
Subject: [PATCH 28/38] fix telemetry

---
 installer/scripts/Telegraf403Telemetry.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/scripts/Telegraf403Telemetry.sh b/installer/scripts/Telegraf403Telemetry.sh
index f4476d9fd..cfa996dda 100644
--- a/installer/scripts/Telegraf403Telemetry.sh
+++ b/installer/scripts/Telegraf403Telemetry.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 count403=$(grep -iF  "[azure_monitor]: failed to write batch: [403] 403 Forbidden" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n')
-echo "telegraf,AKS_RESOURCE_ID=$AKS_RESOURCE_ID, 403countTotal=$count403"
\ No newline at end of file
+echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID}, 403countTotal=${count403}i"
\ No newline at end of file

From 2a3ef70cf26dacbf347ceb353d9d0ab05002c751 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 13:31:22 -0800
Subject: [PATCH 29/38] fix casing issue

---
 installer/conf/telegraf.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 4fd57c1b5..99b271ac4 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -146,7 +146,7 @@
   #   "ai.cloud.role" = "kubernetes_container_name"
   #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
   namepass = ["filestat", "telegraf_telemetry"]
-  tagDrop = ["nodeName"]
+  tagdrop = ["nodeName"]
 
 ###############################################################################
 #                            PROCESSOR PLUGINS                                #

From 63426d235b6afbeeae8ba1c09d0dbf364851ee88 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 14:31:51 -0800
Subject: [PATCH 30/38] fix comma issue.

---
 installer/scripts/Telegraf403Telemetry.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/scripts/Telegraf403Telemetry.sh b/installer/scripts/Telegraf403Telemetry.sh
index cfa996dda..3022ceaf0 100644
--- a/installer/scripts/Telegraf403Telemetry.sh
+++ b/installer/scripts/Telegraf403Telemetry.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 count403=$(grep -iF  "[azure_monitor]: failed to write batch: [403] 403 Forbidden" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n')
-echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID}, 403countTotal=${count403}i"
\ No newline at end of file
+echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID} 403countTotal=${count403}i"
\ No newline at end of file

From 6063f7964b6fd5ee8782dd41da4d2fb4de89eb0e Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Sun, 10 Feb 2019 17:28:59 -0800
Subject: [PATCH 31/38] disable telemetry for rs ; fix stateful set name

---
 installer/conf/td-agent-bit-rs.conf     | 2 +-
 installer/conf/telegraf-rs.conf         | 2 +-
 source/code/go/src/plugins/telemetry.go | 4 +---
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/installer/conf/td-agent-bit-rs.conf b/installer/conf/td-agent-bit-rs.conf
index 19239708b..03d97657e 100644
--- a/installer/conf/td-agent-bit-rs.conf
+++ b/installer/conf/td-agent-bit-rs.conf
@@ -20,6 +20,6 @@
 
 [OUTPUT]
     Name                            oms
-    EnableTelemetry                 true
+    EnableTelemetry                 false
     TelemetryPushIntervalSeconds    300
     Match                           oms.container.log.*
\ No newline at end of file
diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index ece3152ff..b749b5ad9 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -276,7 +276,7 @@
      measurement = "kubernetes_deployment"
      dest = "deployments"
   [[processors.rename.replace]]
-     measurement = "kubernetes_deployment"
+     measurement = "kubernetes_statefulset"
      dest = "statefulsets"
   [[processors.rename.replace]]
      measurement = "kubernetes_node"
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index acf97042b..bbc7be5eb 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -33,8 +33,6 @@ var (
 const (
 	clusterTypeACS                      = "ACS"
 	clusterTypeAKS                      = "AKS"
-	controllerTypeDaemonSet             = "DaemonSet"
-	controllerTypeReplicaSet            = "ReplicaSet"
 	envAKSResourceID                    = "AKS_RESOURCE_ID"
 	envACSResourceName                  = "ACS_RESOURCE_NAME"
 	envAppInsightsAuth                  = "APPLICATIONINSIGHTS_AUTH"
@@ -125,7 +123,7 @@ func InitializeTelemetryClient(agentVersion string) (int, error) {
 	CommonProperties = make(map[string]string)
 	CommonProperties["Computer"] = Computer
 	CommonProperties["WorkspaceID"] = WorkspaceID
-	CommonProperties["ControllerType"] = controllerTypeDaemonSet
+	CommonProperties["ControllerType"] = os.Getenv("CONTROLLER_TYPE")
 	CommonProperties["AgentVersion"] = agentVersion
 
 	aksResourceID := os.Getenv(envAKSResourceID)

From c8965463a06a7efd044d6e2b1b095e596da12c30 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 11 Feb 2019 17:04:53 -0800
Subject: [PATCH 32/38] worksround for namespace fix

---
 installer/conf/telegraf-rs.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index b749b5ad9..fea446345 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -576,7 +576,7 @@
   url = "$K8SSERVICEHOST"
 
   ## Namespace to use
-  # namespace = "default"
+  namespace = ""
 
   ## Use bearer token for authorization. ('bearer_token' takes priority)
   bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"

From a1991ce8ef27994f354b94954523852e4c6634aa Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 15 Mar 2019 18:00:50 -0700
Subject: [PATCH 33/38] telegraf integration - v1

---
 installer/conf/td-agent-bit-rs.conf   |   3 +-
 installer/conf/td-agent-bit.conf      |  23 +++-
 installer/conf/telegraf.conf          |  49 ++++++--
 source/code/go/src/plugins/oms.go     | 166 ++++++++++++++++++++++++++
 source/code/go/src/plugins/out_oms.go |   2 +
 5 files changed, 231 insertions(+), 12 deletions(-)

diff --git a/installer/conf/td-agent-bit-rs.conf b/installer/conf/td-agent-bit-rs.conf
index 03d97657e..7993e7528 100644
--- a/installer/conf/td-agent-bit-rs.conf
+++ b/installer/conf/td-agent-bit-rs.conf
@@ -9,9 +9,10 @@
     Tag oms.container.log.telegraf.err.*
     Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
     DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
-    Mem_Buf_Limit 30m
+    Mem_Buf_Limit 2m
     Path_Key filepath
     Skip_Long_Lines On
+    Ignore_Older 1h
 
 [FILTER]
     Name grep
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 9771a4c96..8f2e5b5cd 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -10,27 +10,38 @@
     Path /var/log/containers/*.log
     DB /var/log/omsagent-fblogs.db
     Parser docker
-    Mem_Buf_Limit 30m
+    Mem_Buf_Limit 5m
     Path_Key filepath
     Skip_Long_Lines On
+    Ignore_Older 5m
 
 [INPUT]
     Name tail
     Tag oms.container.log.flbplugin.*
     Path /var/log/containers/omsagent*.log
     DB /var/opt/microsoft/docker-cimprov/state/omsagent-ai.db
-    Mem_Buf_Limit 30m
+    Mem_Buf_Limit 2m
     Path_Key filepath
     Skip_Long_Lines On
+    Ignore_Older 5m
 
 [INPUT]
     Name tail
     Tag oms.container.log.telegraf.err.*
     Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
     DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
-    Mem_Buf_Limit 30m
+    Mem_Buf_Limit 2m
     Path_Key filepath
     Skip_Long_Lines On
+    Ignore_Older 5m
+
+[INPUT]
+    Name        tcp
+    Tag oms.container.perf.telegraf.*
+    Listen      0.0.0.0
+    Port        25226
+    Chunk_Size  32
+    Buffer_Size 64
 
 [FILTER]
     Name grep
@@ -41,4 +52,8 @@
     Name                            oms
     EnableTelemetry                 true
     TelemetryPushIntervalSeconds    300
-    Match                           oms.container.log.*
\ No newline at end of file
+    Match                           oms.container.*
+
+#[OUTPUT]
+#    Name file
+#    Match oms.container.perf.telegraf.*
\ No newline at end of file
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 99b271ac4..ccd53bc03 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -77,9 +77,9 @@
 
   ## Logging configuration:
   ## Run telegraf with debug log messages.
-  debug = false
+  debug = true
   ## Run telegraf in quiet mode (error log messages only).
-  quiet = true
+  quiet = false
   ## Specify the log file name. The empty string means to log to stderr.
   logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
 
@@ -94,29 +94,29 @@
 ###############################################################################
 
 # Send aggregate metrics to Azure Monitor
-[[outputs.azure_monitor]]
+#[[outputs.azure_monitor]]
   ## Timeout for HTTP writes.
   # timeout = "20s"
 
   ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
-  namespace_prefix = "Insights.Container/"
+#  namespace_prefix = "Insights.Container/"
 
   ## Azure Monitor doesn't have a string value type, so convert string
   ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows
   ## a maximum of 10 dimensions so Telegraf will only send the first 10
   ## alphanumeric dimensions.
-  strings_as_dimensions = true
+#  strings_as_dimensions = true
 
   ## Both region and resource_id must be set or be available via the
   ## Instance Metadata service on Azure Virtual Machines.
   #
   ## Azure Region to publish metrics against.
   ##   ex: region = "southcentralus"
-  region = "placeholder_region"
+#  region = "placeholder_region"
   #
   ## The Azure Resource ID against which metric will be logged, e.g.
   #resource_id = "/subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/virtualMachines/<vm_name>"
-  resource_id = "placeholder_resource_id"
+#  resource_id = "placeholder_resource_id"
 
   #azure_tenant_id = "placeholder_azure_tenant_id"
 
@@ -125,6 +125,41 @@
   #azure_client_secret = "placeholder_azure_client_secret"
 
   #namepass = ["nodes", "pods", "containers","prometheus"]
+#  namedrop = ["filestat", "telegraf_telemetry"]
+#  tagdrop = ["AgentVersion","AKS_RESOURCE_ID","Region","ClusterName","ClusterType", "Computer", "ControllerType"]
+
+# Generic socket writer capable of handling multiple socket types.
+[[outputs.socket_writer]]
+  ## URL to connect to
+  address = "tcp://0.0.0.0:25226"
+  # address = "tcp://example.com:http"
+  # address = "tcp4://127.0.0.1:8094"
+  # address = "tcp6://127.0.0.1:8094"
+  # address = "tcp6://[2001:db8::1]:8094"
+  # address = "udp://127.0.0.1:8094"
+  # address = "udp4://127.0.0.1:8094"
+  # address = "udp6://127.0.0.1:8094"
+  # address = "unix:///tmp/telegraf.sock"
+  # address = "unixgram:///tmp/telegraf.sock"
+
+  ## Optional TLS Config
+  # tls_ca = "/etc/telegraf/ca.pem"
+  # tls_cert = "/etc/telegraf/cert.pem"
+  # tls_key = "/etc/telegraf/key.pem"
+  ## Use TLS but skip chain & host verification
+  # insecure_skip_verify = false
+
+  ## Period between keep alive probes.
+  ## Only applies to TCP sockets.
+  ## 0 disables keep alive probes.
+  ## Defaults to the OS configuration.
+  # keep_alive_period = "5m"
+
+  ## Data format to generate.
+  ## Each data format has its own unique set of configuration options, read
+  ## more about them here:
+  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "json"
   namedrop = ["filestat", "telegraf_telemetry"]
   tagdrop = ["AgentVersion","AKS_RESOURCE_ID","Region","ClusterName","ClusterType", "Computer", "ControllerType"]
 
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index d58a33b55..c97da3963 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -25,6 +25,11 @@ import (
 // DataType for Container Log
 const DataType = "CONTAINER_LOG_BLOB"
 
+//env varibale which has ResourceId for LA
+const ResourceIdEnv = "AKS_RESOURCE_ID"
+
+const CustomLogsAPIVersion = "api-version=2016-04-01"
+
 // ContainerLogPluginConfFilePath --> config file path for container log plugin
 const DaemonSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms.conf"
 const ReplicaSetContainerLogPluginConfFilePath = "/etc/opt/microsoft/docker-cimprov/out_oms-rs.conf"
@@ -41,6 +46,8 @@ var (
 	HTTPClient http.Client
 	// OMSEndpoint ingestion endpoint
 	OMSEndpoint string
+	// Custom log ingestion endpoint for OMS
+	OMSCustomLogsEndpoint string
 	// Computer (Hostname) when ingesting into ContainerLog table
 	Computer string
 	// WorkspaceID log analytics workspace id
@@ -60,6 +67,8 @@ var (
 	ContainerLogTelemetryMutex = &sync.Mutex{}
 	// ClientSet for querying KubeAPIs
 	ClientSet *kubernetes.Clientset
+	//ResourceId for LA
+	ResourceId string
 )
 
 var (
@@ -89,6 +98,18 @@ type DataItem struct {
 	Computer          string `json:"Computer"`
 }
 
+// telegraf metric DataItem represents the object corresponding to the json that is sent by fluentbit tail plugin
+type laTelegrafMetric struct {
+	Namespace          		string `json:"Namespace"`
+	Name	          		string `json:"Name"`
+	Source    				string `json:"Source"`
+	TimeStamp 				string `json:"TimeStamp"`
+	Tags					string `json:"Tags"`
+	Value                	float64 `json:"Value"`
+	ResourceId	            string `json:"ResourceId"`
+	MetricType              string `json:"MetricType"`
+}
+
 // ContainerLogBlob represents the object corresponding to the payload that is sent to the ODS end point
 type ContainerLogBlob struct {
 	DataType  string     `json:"DataType"`
@@ -201,6 +222,147 @@ func updateKubeSystemContainerIDs() {
 	}
 }
 
+//Azure loganalytics metric values have to be numeric, so string values are dropped
+func convert(in interface{}) (float64, bool) {
+	Log ("got %v", in)
+	switch v := in.(type) {
+	case int64:
+		return float64(v), true
+	case uint64:
+		return float64(v), true
+	case float64:
+		return v, true
+	case bool:
+		if v {
+			return float64(1), true
+		}
+		return float64(0), true
+	default:
+		Log ("returning 0 for %v ", in)
+		return float64(0), false
+	}
+}
+
+//Translates telegraf time series to 1 or more Azure loganalytics metric
+func translate(m map[interface{}]interface{}) ([]*laTelegrafMetric, error) {
+	
+	var laMetrics []*laTelegrafMetric
+	var tags map[interface{}]interface{}
+	tags = m["tags"].(map[interface{}]interface{})
+	tagMap := make(map[string]string)
+	for k, v := range tags {
+		key := fmt.Sprintf("%s",k)
+		if key == "" {
+			continue
+		}
+		tagMap[key] = fmt.Sprintf("%s",v)
+	}
+
+	var fieldMap map[interface{}]interface{}
+	fieldMap = m["fields"].(map[interface{}]interface{})
+
+	var metricType string = "unknown" 
+
+	tagJson, _ := json.Marshal(&tagMap)
+
+	for k, v := range fieldMap {
+		fv, ok := convert(v)
+		if !ok {
+			continue
+		}
+		i := m["timestamp"].(uint64)
+		laMetric := laTelegrafMetric{
+			Name:       fmt.Sprintf("%s",k),
+			Namespace:  fmt.Sprintf("%s",m["name"]),
+			Source:		"telegraf",
+			TimeStamp:  time.Unix(int64(i),0).Format(time.RFC3339),
+			Tags:     	fmt.Sprintf("%s", tagJson),
+			Value:		fv,
+			ResourceId:	ResourceId,
+			MetricType: metricType,
+		}
+
+		//Log ("la metric:%v", laMetric)
+		laMetrics = append(laMetrics, &laMetric)
+	}
+	return laMetrics, nil
+}
+
+//send metrics from Telegraf to LA
+func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int {
+	var laMetrics []*laTelegrafMetric
+	for _, record := range telegrafRecords {
+		//Log ("mymetric:%s", record)
+		translatedMetrics, err := translate(record)
+		if err != nil {
+			Log("PostTelegrafMetricsToLA::Error when translating telegraf metric to log analytics metric %q", err)
+		}
+		laMetrics = append(laMetrics, translatedMetrics...)
+	}
+
+	jsonBytes, err := json.Marshal(&laMetrics)
+	if err != nil {
+		Log("PostTelegrafMetricsToLA::Error when marshalling json %q", err)
+		//SendException(message)
+		return output.FLB_OK
+	}
+
+	Log ("got %s metrics", len(laMetrics))
+
+	//start
+	req, _ := http.NewRequest("POST", OMSCustomLogsEndpoint, bytes.NewBuffer(jsonBytes))
+	
+	//req.URL.Query().Add("api-version","2016-04-01")
+	
+	req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339))
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Log-Type", "telegrafMetricsV1");
+	req.Header.Set("time-generated-field", "timestamp");
+	req.Header.Set("x-ms-AzureResourceId", ResourceId)
+
+	start := time.Now()
+	resp, err := HTTPClient.Do(req)
+	elapsed := time.Since(start)
+
+	if err != nil {
+		message := fmt.Sprintf("PostTelegrafMetricsToLA::Error when sending request %s \n", err.Error())
+		Log(message)
+		//SendException(message)
+		Log("PostTelegrafMetricsToLA::Failed to flush %d records after %s", len(laMetrics), elapsed)
+
+		return output.FLB_RETRY
+	}
+
+	if resp == nil || resp.StatusCode != 200 {
+		if resp != nil {
+			Log("PostTelegrafMetricsToLA::Response Status %s Status Code %d", resp.Status, resp.StatusCode)
+		}
+		return output.FLB_RETRY
+	}
+
+	defer resp.Body.Close()
+
+	numRecords := len(laMetrics)
+	Log("PostTelegrafMetricsToLA::Successfully flushed %d records in %s", numRecords, elapsed)
+	//ContainerLogTelemetryMutex.Lock()
+	//FlushedRecordsCount += float64(numRecords)
+	//FlushedRecordsTimeTaken += float64(elapsed / time.Millisecond)
+
+	//if maxLatency >= AgentLogProcessingMaxLatencyMs {
+	//	AgentLogProcessingMaxLatencyMs = maxLatency
+	//	AgentLogProcessingMaxLatencyMsContainer = maxLatencyContainer
+	//}
+
+	//ContainerLogTelemetryMutex.Unlock()
+//}
+
+	return output.FLB_OK
+
+
+	//end
+	
+}
+
 // PostDataHelper sends data to the OMS endpoint
 func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 
@@ -317,6 +479,8 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			return output.FLB_RETRY
 		}
 
+		defer resp.Body.Close()
+
 		numRecords := len(dataItems)
 		Log("Successfully flushed %d records in %s", numRecords, elapsed)
 		ContainerLogTelemetryMutex.Lock()
@@ -358,6 +522,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	IgnoreIDSet = make(map[string]bool)
 	ImageIDMap = make(map[string]string)
 	NameIDMap = make(map[string]string)
+	ResourceId = os.Getenv(ResourceIdEnv)
 
 	pluginConfig, err := ReadConfiguration(pluginConfPath)
 	if err != nil {
@@ -377,6 +542,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 		log.Fatalln(message)
 	}
 	OMSEndpoint = omsadminConf["OMS_ENDPOINT"]
+	OMSCustomLogsEndpoint = OMSEndpoint + "?" + CustomLogsAPIVersion
 	WorkspaceID = omsadminConf["WORKSPACE_ID"]
 	Log("OMSEndpoint %s", OMSEndpoint)
 
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 90ecaf15a..dccc6774c 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -62,6 +62,8 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 	incomingTag := strings.ToLower(C.GoString(tag))
 	if strings.Contains(incomingTag, "oms.container.log.flbplugin") {
 		return PushToAppInsightsTraces(records, appinsights.Information, incomingTag)
+	} else if strings.Contains(incomingTag, "oms.container.perf.telegraf") {
+		return PostTelegrafMetricsToLA(records)
 	} else if strings.Contains(incomingTag, "oms.container.log.telegraf.err") {
 		return PushToAppInsightsTraces(records, appinsights.Error, incomingTag)
 	}

From 1c4c714e553cfe1658210dda47048945cc05465e Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 19 Mar 2019 18:00:35 -0700
Subject: [PATCH 34/38] telemetry changes for telegraf

---
 installer/conf/telegraf-rs.conf           | 10 +--
 installer/conf/telegraf.conf              | 10 +--
 installer/scripts/Telegraf403Telemetry.sh |  4 +-
 source/code/go/src/plugins/oms.go         | 85 +++++++++++++----------
 source/code/go/src/plugins/telemetry.go   | 15 +++-
 5 files changed, 67 insertions(+), 57 deletions(-)

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index fea446345..fd430e6b9 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -142,7 +142,7 @@
   # [outputs.application_insights.context_tag_sources]
   #   "ai.cloud.role" = "kubernetes_container_name"
   #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
-  namepass = ["filestat", "telegraf_telemetry"]
+  namepass = ["telegraf_telemetry"]
 
 ###############################################################################
 #                            PROCESSOR PLUGINS                                #
@@ -562,14 +562,6 @@
   ## Use TLS but skip chain & host verification
 #  insecure_skip_verify = true
   # Read stats about given file(s)
-[[inputs.filestat]]
-  ## Files to gather stats about.
-  interval = "15m"
-  ## These accept standard unix glob matching rules, but with the addition of
-  ## ** as a "super asterisk". See https://github.com/gobwas/glob.
-  files = ["/var/opt/microsoft/docker-cimprov/log/telegraf.log"]
-  ## If true, read the entire file and calculate an md5 checksum.
-  md5 = false
 [[inputs.kube_inventory]]
   ## URL for the Kubernetes API
   #url = "https://127.0.0.1"
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index ccd53bc03..dd86a50a9 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -180,7 +180,7 @@
   # [outputs.application_insights.context_tag_sources]
   #   "ai.cloud.role" = "kubernetes_container_name"
   #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
-  namepass = ["filestat", "telegraf_telemetry"]
+  namepass = ["telegraf_telemetry"]
   tagdrop = ["nodeName"]
 
 ###############################################################################
@@ -651,14 +651,6 @@
   fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images"]
   #fieldpass = ["numContainers", "numContainersRunning", "numContainersStopped", "numContainersPaused", "numContainerImages"]
   taginclude = ["nodeName"]
-[[inputs.filestat]]
-  ## Files to gather stats about.
-  interval = "15m"
-  ## These accept standard unix glob matching rules, but with the addition of
-  ## ** as a "super asterisk". See https://github.com/gobwas/glob.
-  files = ["/var/opt/microsoft/docker-cimprov/log/telegraf.log"]
-  ## If true, read the entire file and calculate an md5 checksum.
-  md5 = false
 [[inputs.exec]]
   ## Commands array
   interval = "15m"
diff --git a/installer/scripts/Telegraf403Telemetry.sh b/installer/scripts/Telegraf403Telemetry.sh
index 3022ceaf0..c64369798 100644
--- a/installer/scripts/Telegraf403Telemetry.sh
+++ b/installer/scripts/Telegraf403Telemetry.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
-count403=$(grep -iF  "[azure_monitor]: failed to write batch: [403] 403 Forbidden" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n')
-echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID} 403countTotal=${count403}i"
\ No newline at end of file
+countErr=$(grep -iF  "Error writing to output [socket_writer]" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n')
+echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID} telegrafTCPWriteErrorCountTotal=${countErr}i"
\ No newline at end of file
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index c97da3963..a18a9b37a 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -224,7 +224,6 @@ func updateKubeSystemContainerIDs() {
 
 //Azure loganalytics metric values have to be numeric, so string values are dropped
 func convert(in interface{}) (float64, bool) {
-	Log ("got %v", in)
 	switch v := in.(type) {
 	case int64:
 		return float64(v), true
@@ -244,7 +243,7 @@ func convert(in interface{}) (float64, bool) {
 }
 
 //Translates telegraf time series to 1 or more Azure loganalytics metric
-func translate(m map[interface{}]interface{}) ([]*laTelegrafMetric, error) {
+func translateTelegrafMetrics(m map[interface{}]interface{}) ([]*laTelegrafMetric, error) {
 	
 	var laMetrics []*laTelegrafMetric
 	var tags map[interface{}]interface{}
@@ -263,7 +262,11 @@ func translate(m map[interface{}]interface{}) ([]*laTelegrafMetric, error) {
 
 	var metricType string = "unknown" 
 
-	tagJson, _ := json.Marshal(&tagMap)
+	tagJson, err := json.Marshal(&tagMap)
+
+	if err != nil {
+		return nil, err
+	}
 
 	for k, v := range fieldMap {
 		fv, ok := convert(v)
@@ -288,32 +291,49 @@ func translate(m map[interface{}]interface{}) ([]*laTelegrafMetric, error) {
 	return laMetrics, nil
 }
 
-//send metrics from Telegraf to LA
+//send metrics from Telegraf to LA. 1) Translate telegraf timeseries to LA metric(s) 2) Send it to LA as 'ContainerMetrics' fixed type
 func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int {
 	var laMetrics []*laTelegrafMetric
+
+	if ( (telegrafRecords== nil) || ! (len(telegrafRecords) > 0) ) {
+		Log("PostTelegrafMetricsToLA::Error:no timeseries to derive")
+		return output.FLB_OK
+	}
+
 	for _, record := range telegrafRecords {
-		//Log ("mymetric:%s", record)
-		translatedMetrics, err := translate(record)
+		translatedMetrics, err := translateTelegrafMetrics(record)
 		if err != nil {
-			Log("PostTelegrafMetricsToLA::Error when translating telegraf metric to log analytics metric %q", err)
+			message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when translating telegraf metric to log analytics metric %q", err)
+			Log(message)
+			//SendException(message) //This will be too noisy
 		}
 		laMetrics = append(laMetrics, translatedMetrics...)
 	}
 
-	jsonBytes, err := json.Marshal(&laMetrics)
-	if err != nil {
-		Log("PostTelegrafMetricsToLA::Error when marshalling json %q", err)
-		//SendException(message)
+	if ( (laMetrics == nil) || !(len(laMetrics) > 0) ) {
+		Log("PostTelegrafMetricsToLA::Info:no metrics derived from timeseries data")
 		return output.FLB_OK
+	} else {
+		message := fmt.Sprintf("PostTelegrafMetricsToLA::Info:derived %v metrics from %v timeseries", len(laMetrics), len(telegrafRecords))
+		Log(message)
 	}
 
-	Log ("got %s metrics", len(laMetrics))
 
-	//start
-	req, _ := http.NewRequest("POST", OMSCustomLogsEndpoint, bytes.NewBuffer(jsonBytes))
+	jsonBytes, err := json.Marshal(&laMetrics)
+
+	if err != nil {
+		message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:when marshalling json %q", err)
+		Log(message)
+		SendException(message)
+		return output.FLB_OK
+	}
 	
+	//Post metrics data to LA
+	req, _ := http.NewRequest("POST", OMSCustomLogsEndpoint, bytes.NewBuffer(jsonBytes))
+
 	//req.URL.Query().Add("api-version","2016-04-01")
-	
+
+	//set headers
 	req.Header.Set("x-ms-date", time.Now().Format(time.RFC3339))
 	req.Header.Set("Content-Type", "application/json")
 	req.Header.Set("Log-Type", "telegrafMetricsV1");
@@ -325,42 +345,35 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int
 	elapsed := time.Since(start)
 
 	if err != nil {
-		message := fmt.Sprintf("PostTelegrafMetricsToLA::Error when sending request %s \n", err.Error())
+		message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error())
 		Log(message)
-		//SendException(message)
-		Log("PostTelegrafMetricsToLA::Failed to flush %d records after %s", len(laMetrics), elapsed)
-
+		SendException(message)
+		UpdateNumTelegrafMetricsSentTelemetry(0, 1)
 		return output.FLB_RETRY
 	}
 
 	if resp == nil || resp.StatusCode != 200 {
 		if resp != nil {
-			Log("PostTelegrafMetricsToLA::Response Status %s Status Code %d", resp.Status, resp.StatusCode)
+			Log("PostTelegrafMetricsToLA::Error:(retriable) Response Status %v Status Code %v", resp.Status, resp.StatusCode)
 		}
+		UpdateNumTelegrafMetricsSentTelemetry(0, 1)
 		return output.FLB_RETRY
 	}
 
 	defer resp.Body.Close()
 
-	numRecords := len(laMetrics)
-	Log("PostTelegrafMetricsToLA::Successfully flushed %d records in %s", numRecords, elapsed)
-	//ContainerLogTelemetryMutex.Lock()
-	//FlushedRecordsCount += float64(numRecords)
-	//FlushedRecordsTimeTaken += float64(elapsed / time.Millisecond)
-
-	//if maxLatency >= AgentLogProcessingMaxLatencyMs {
-	//	AgentLogProcessingMaxLatencyMs = maxLatency
-	//	AgentLogProcessingMaxLatencyMsContainer = maxLatencyContainer
-	//}
-
-	//ContainerLogTelemetryMutex.Unlock()
-//}
+	numMetrics := len(laMetrics)
+	UpdateNumTelegrafMetricsSentTelemetry(numMetrics, 0)
+	Log("PostTelegrafMetricsToLA::Info:Successfully flushed %v records in %v", numMetrics, elapsed)
 
 	return output.FLB_OK
+}
 
-
-	//end
-	
+func UpdateNumTelegrafMetricsSentTelemetry(numMetricsSent int, numSendErrors int) {
+	ContainerLogTelemetryMutex.Lock()
+	TelegrafMetricsSentCount += float64(numMetricsSent)
+	TelegrafMetricsSendErrorCount += float64(numSendErrors)
+	ContainerLogTelemetryMutex.Unlock()
 }
 
 // PostDataHelper sends data to the OMS endpoint
diff --git a/source/code/go/src/plugins/telemetry.go b/source/code/go/src/plugins/telemetry.go
index bbc7be5eb..370fb63e9 100644
--- a/source/code/go/src/plugins/telemetry.go
+++ b/source/code/go/src/plugins/telemetry.go
@@ -14,7 +14,7 @@ import (
 )
 
 var (
-	// FlushedRecordsCount indicates the number of flushed records in the current period
+	// FlushedRecordsCount indicates the number of flushed log records in the current period
 	FlushedRecordsCount float64
 	// FlushedRecordsTimeTaken indicates the cumulative time taken to flush the records for the current period
 	FlushedRecordsTimeTaken float64
@@ -28,6 +28,10 @@ var (
 	TelemetryClient appinsights.TelemetryClient
 	// ContainerLogTelemetryTicker sends telemetry periodically
 	ContainerLogTelemetryTicker *time.Ticker
+	//Tracks the number of telegraf metrics sent successfully between telemetry ticker periods (uses ContainerLogTelemetryTicker)
+	TelegrafMetricsSentCount float64
+	//Tracks the number of send errors between telemetry ticker periods (uses ContainerLogTelemetryTicker)
+	TelegrafMetricsSendErrorCount float64
 )
 
 const (
@@ -39,6 +43,8 @@ const (
 	metricNameAvgFlushRate              = "ContainerLogAvgRecordsFlushedPerSec"
 	metricNameAvgLogGenerationRate      = "ContainerLogsGeneratedPerSec"
 	metricNameAgentLogProcessingMaxLatencyMs = "ContainerLogsAgentSideLatencyMs"
+	metricNameNumberofTelegrafMetricsSentSuccessfully = "TelegrafMetricsSentCount"
+	metricNameNumberofSendErrorsTelegrafMetrics = "TelegrafMetricsSendErrorCount"
 
 	defaultTelemetryPushIntervalSeconds = 300
 
@@ -62,9 +68,14 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
 	for ; true; <-ContainerLogTelemetryTicker.C {
 		SendEvent(eventNameDaemonSetHeartbeat, make(map[string]string))
 		elapsed := time.Since(start)
+
 		ContainerLogTelemetryMutex.Lock()
 		flushRate := FlushedRecordsCount / FlushedRecordsTimeTaken * 1000
 		logRate := FlushedRecordsCount / float64(elapsed/time.Second)
+		telegrafMetricsSentCount := TelegrafMetricsSentCount
+		telegrafMetricsSendErrorCount := TelegrafMetricsSendErrorCount
+		TelegrafMetricsSentCount = 0.0
+		TelegrafMetricsSendErrorCount = 0.0
 		FlushedRecordsCount = 0.0
 		FlushedRecordsTimeTaken = 0.0
 		logLatencyMs := AgentLogProcessingMaxLatencyMs
@@ -80,6 +91,8 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
 		logLatencyMetric := appinsights.NewMetricTelemetry(metricNameAgentLogProcessingMaxLatencyMs, logLatencyMs)
 		logLatencyMetric.Properties["Container"] = logLatencyMsContainer
 		TelemetryClient.Track(logLatencyMetric)
+		TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameNumberofTelegrafMetricsSentSuccessfully, telegrafMetricsSentCount))
+		TelemetryClient.Track(appinsights.NewMetricTelemetry(metricNameNumberofSendErrorsTelegrafMetrics, telegrafMetricsSendErrorCount))
 		start = time.Now()
 	}
 }

From f1325de25dd3cebdc4cb5d2a2e81fcc77a1f76cd Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Thu, 21 Mar 2019 16:38:51 -0700
Subject: [PATCH 35/38] telemetry & other changes

---
 installer/conf/telegraf-rs.conf               | 618 ------------------
 installer/conf/telegraf.conf                  | 348 +++-------
 installer/datafiles/base_container.data       |   3 +-
 installer/scripts/Telegraf403Telemetry.sh     |   3 -
 .../scripts/TelegrafTCPErrorTelemetry.sh      |   3 +
 5 files changed, 99 insertions(+), 876 deletions(-)
 delete mode 100644 installer/conf/telegraf-rs.conf
 delete mode 100644 installer/scripts/Telegraf403Telemetry.sh
 create mode 100644 installer/scripts/TelegrafTCPErrorTelemetry.sh

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
deleted file mode 100644
index fd430e6b9..000000000
--- a/installer/conf/telegraf-rs.conf
+++ /dev/null
@@ -1,618 +0,0 @@
-# Telegraf Configuration
-#
-# Telegraf is entirely plugin driven. All metrics are gathered from the
-# declared inputs, and sent to the declared outputs.
-#
-# Plugins must be declared in here to be active.
-# To deactivate a plugin, comment out the name and any variables.
-#
-# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
-# file would generate.
-#
-# Environment variables can be used anywhere in this config file, simply prepend
-# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
-# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
-
-
-# Global tags can be specified here in key="value" format.
-[global_tags]
-  # dc = "us-east-1" # will tag all metrics with dc=us-east-1
-  # rack = "1a"
-  ## Environment variables can be used as tags, and throughout the config file
-  # user = "$USER"
-  # cluster = "$ACS_RESOURCE_NAME"
-  #node = $NODE_IP
-  AgentVersion = "$AGENT_VERSION"
-  AKS_RESOURCE_ID = "$AKS_RESOURCE_ID"
-  Region = "$AKS_REGION"
-  ClusterName = "$AKS_CLUSTER_NAME"
-  ClusterType = "AKS"
-  Computer = "placeholder_hostname"
-  ControllerType = "$CONTROLLER_TYPE"
-
-# Configuration for telegraf agent
-[agent]
-  ## Default data collection interval for all inputs
-  interval = "60s"
-  ## Rounds collection interval to 'interval'
-  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
-  round_interval = true
-
-  ## Telegraf will send metrics to outputs in batches of at most
-  ## metric_batch_size metrics.
-  ## This controls the size of writes that Telegraf sends to output plugins.
-  metric_batch_size = 1000
-
-  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
-  ## output, and will flush this buffer on a successful write. Oldest metrics
-  ## are dropped first when this buffer fills.
-  ## This buffer only fills when writes fail to output plugin(s).
-  metric_buffer_limit = 10000
-
-  ## Collection jitter is used to jitter the collection by a random amount.
-  ## Each plugin will sleep for a random time within jitter before collecting.
-  ## This can be used to avoid many plugins querying things like sysfs at the
-  ## same time, which can have a measurable effect on the system.
-  collection_jitter = "0s"
-
-  ## Default flushing interval for all outputs. You shouldn't set this below
-  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
-  flush_interval = "10s"
-  ## Jitter the flush interval by a random amount. This is primarily to avoid
-  ## large write spikes for users running a large number of telegraf instances.
-  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
-  flush_jitter = "0s"
-
-  ## By default or when set to "0s", precision will be set to the same
-  ## timestamp order as the collection interval, with the maximum being 1s.
-  ##   ie, when interval = "10s", precision will be "1s"
-  ##       when interval = "250ms", precision will be "1ms"
-  ## Precision will NOT be used for service inputs. It is up to each individual
-  ## service input to set the timestamp at the appropriate precision.
-  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
-  precision = ""
-
-  ## Logging configuration:
-  ## Run telegraf with debug log messages.
-  debug = false
-  ## Run telegraf in quiet mode (error log messages only).
-  quiet = true
-  ## Specify the log file name. The empty string means to log to stderr.
-  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
-
-  ## Override default hostname, if empty use os.Hostname()
-  #hostname = "placeholder_hostname"
-  ## If set to true, do no set the "host" tag in the telegraf agent.
-  omit_hostname = true
-
-
-###############################################################################
-#                            OUTPUT PLUGINS                                   #
-###############################################################################
-
-# Send aggregate metrics to Azure Monitor
-[[outputs.azure_monitor]]
-  ## Timeout for HTTP writes.
-  # timeout = "20s"
-
-  ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
-  namespace_prefix = "Insights.Container/"
-
-  ## Azure Monitor doesn't have a string value type, so convert string
-  ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows
-  ## a maximum of 10 dimensions so Telegraf will only send the first 10
-  ## alphanumeric dimensions.
-  strings_as_dimensions = true
-
-  ## Both region and resource_id must be set or be available via the
-  ## Instance Metadata service on Azure Virtual Machines.
-  #
-  ## Azure Region to publish metrics against.
-  ##   ex: region = "southcentralus"
-  region = "placeholder_region"
-  #
-  ## The Azure Resource ID against which metric will be logged, e.g.
-  #resource_id = "/subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/virtualMachines/<vm_name>"
-  resource_id = "placeholder_resource_id"
-
-  #azure_tenant_id = "placeholder_azure_tenant_id"
-
-  #azure_client_id = "placeholder_azure_client_id"
-
-  #azure_client_secret = "placeholder_azure_client_secret"
-
-  #namepass = ["nodes", "pods", "containers","prometheus"]
-  namedrop = ["filestat", "telegraf_telemetry"]
-  tagdrop = ["AgentVersion","AKS_RESOURCE_ID","Region","ClusterName","ClusterType", "Computer", "ControllerType"]
-
-[[outputs.application_insights]]
-  ## Instrumentation key of the Application Insights resource.
-  instrumentation_key = "$APPLICATIONINSIGHTS_KEY"
-
-  ## Timeout for closing (default: 5s).
-  # timeout = "5s"
-
-  ## Enable additional diagnostic logging.
-  # enable_diagnostic_logging = false
-
-  ## Context Tag Sources add Application Insights context tags to a tag value.
-  ##
-  ## For list of allowed context tag keys see:
-  ## https://github.com/Microsoft/ApplicationInsights-Go/blob/master/appinsights/contracts/contexttagkeys.go
-  # [outputs.application_insights.context_tag_sources]
-  #   "ai.cloud.role" = "kubernetes_container_name"
-  #   "ai.cloud.roleInstance" = "kubernetes_pod_name"
-  namepass = ["telegraf_telemetry"]
-
-###############################################################################
-#                            PROCESSOR PLUGINS                                #
-###############################################################################
-
-# # Convert values to another metric value type
-# [[processors.converter]]
-#   ## Tags to convert
-#   ##
-#   ## The table key determines the target type, and the array of key-values
-#   ## select the keys to convert.  The array may contain globs.
-#   ##   <target-type> = [<tag-key>...]
-#   [processors.converter.tags]
-#     string = ["device"]
-#     integer = []
-#     unsigned = []
-#     boolean = []
-#     float = []
-#
-#   ## Fields to convert
-#   ##
-#   ## The table key determines the target type, and the array of key-values
-#   ## select the keys to convert.  The array may contain globs.
-#   ##   <target-type> = [<field-key>...]
-#   [processors.converter.fields]
-#     tag = ["host"]
-#     string = []
-#     integer = []
-#     unsigned = []
-#     boolean = []
-#     float = []
-
-
-# # Map enum values according to given table.
-# [[processors.enum]]
-#   [[processors.enum.mapping]]
-#     ## Name of the field to map
-#     field = "status"
-#
-#     ## Destination field to be used for the mapped value.  By default the source
-#     ## field is used, overwriting the original value.
-#     # dest = "status_code"
-#
-#     ## Default value to be used for all values not contained in the mapping
-#     ## table.  When unset, the unmodified value for the field will be used if no
-#     ## match is found.
-#     # default = 0
-#
-#     ## Table of mappings
-#     [processors.enum.mapping.value_mappings]
-#       green = 1
-#       yellow = 2
-#       red = 3
-
-
-# # Apply metric modifications using override semantics.
-# [[processors.override]]
-#   ## All modifications on inputs and aggregators can be overridden:
-#   # name_override = "new_name"
-#   # name_prefix = "new_name_prefix"
-#   # name_suffix = "new_name_suffix"
-#
-#   ## Tags to be added (all values must be strings)
-#   # [processors.override.tags]
-#   #   additional_tag = "tag_value"
-
-
-# # Parse a value in a specified field/tag(s) and add the result in a new metric
-# [[processors.parser]]
-#   ## The name of the fields whose value will be parsed.
-#   parse_fields = []
-#
-#   ## If true, incoming metrics are not emitted.
-#   drop_original = false
-#
-#   ## If set to override, emitted metrics will be merged by overriding the
-#   ## original metric using the newly parsed metrics.
-#   merge = "override"
-#
-#   ## The dataformat to be read from files
-#   ## Each data format has its own unique set of configuration options, read
-#   ## more about them here:
-#   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
-#   data_format = "influx"
-
-
-# # Print all metrics that pass through this filter.
-# [[processors.printer]]
-
-
-# # Transforms tag and field values with regex pattern
-# [[processors.regex]]
-#   ## Tag and field conversions defined in a separate sub-tables
-#   # [[processors.regex.tags]]
-#   #   ## Tag to change
-#   #   key = "resp_code"
-#   #   ## Regular expression to match on a tag value
-#   #   pattern = "^(\\d)\\d\\d$"
-#   #   ## Pattern for constructing a new value (${1} represents first subgroup)
-#   #   replacement = "${1}xx"
-#
-#   # [[processors.regex.fields]]
-#   #   key = "request"
-#   #   ## All the power of the Go regular expressions available here
-#   #   ## For example, named subgroups
-#   #   pattern = "^/api(?P<method>/[\\w/]+)\\S*"
-#   #   replacement = "${method}"
-#   #   ## If result_key is present, a new field will be created
-#   #   ## instead of changing existing field
-#   #   result_key = "method"
-#
-#   ## Multiple conversions may be applied for one field sequentially
-#   ## Let's extract one more value
-#   # [[processors.regex.fields]]
-#   #   key = "request"
-#   #   pattern = ".*category=(\\w+).*"
-#   #   replacement = "${1}"
-#   #   result_key = "search_category"
-
-
-# # Rename measurements, tags, and fields that pass through this filter.
-# [[processors.rename]]
-
-
-# # Perform string processing on tags, fields, and measurements
-[[processors.rename]]
-  [[processors.rename.replace]]
-     measurement = "kubernetes_daemonset"
-     dest = "daemonsets"
-  [[processors.rename.replace]]
-     measurement = "kubernetes_deployment"
-     dest = "deployments"
-  [[processors.rename.replace]]
-     measurement = "kubernetes_statefulset"
-     dest = "statefulsets"
-  [[processors.rename.replace]]
-     measurement = "kubernetes_node"
-     dest = "nodes"
-  [[processors.rename.replace]]
-     measurement = "kubernetes_pod_container"
-     dest = "containers"
-  [[processors.rename.replace]]
-     field = "current_number_scheduled"
-     dest = "currentNumberScheduled"
-  [[processors.rename.replace]]
-     field = "desired_number_scheduled"
-     dest = "desiredNumberScheduled"
-  [[processors.rename.replace]]
-     field = "number_available"
-     dest = "numberAvailable"
-  [[processors.rename.replace]]
-     field = "number_unavailable"
-     dest = "numUnavailable"
-  [[processors.rename.replace]]
-     field = "number_ready"
-     dest = "numReady"
-  [[processors.rename.replace]]
-     field = "replicas_available"
-     dest = "numReplicasAvailable"
-  [[processors.rename.replace]]
-     field = "replicas_unavailable"
-     dest = "numReplicasUnavailable"
-  [[processors.rename.replace]]
-     field = "capacity_cpu_cores"
-     dest = "capacityCpuCores"
-  [[processors.rename.replace]]
-     field = "capacity_memory_bytes"
-     dest = "capacityMemoryBytes"
-  [[processors.rename.replace]]
-     field = "capacity_pods"
-     dest = "capacityNumPods"
-  [[processors.rename.replace]]
-     field = "allocatable_pods"
-     dest = "allocatableNumPods"
-  [[processors.rename.replace]]
-     field = "allocatable_cpu_cores"
-     dest = "allocatableCpuCores"
-  [[processors.rename.replace]]
-     field = "allocatable_memory_bytes"
-     dest = "allocatableMemoryBytes"
-  [[processors.rename.replace]]
-     field = "restarts_total"
-     dest = "restartsTotal"
-  [[processors.rename.replace]]
-     field = "resource_requests_cpu_units"
-     dest = "resourceRequestsCpuUnits"
-  [[processors.rename.replace]]
-     field = "resource_requests_memory_bytes"
-     dest = "resourceRequestsMemoryBytes"
-  [[processors.rename.replace]]
-     field = "resource_limits_cpu_units"
-     dest = "resourceLimitsCpuUnits"
-  [[processors.rename.replace]]
-     field = "resource_limits_memory_bytes"
-     dest = "resourceLimitsMemoryBytes"
-  [[processors.rename.replace]]
-     field = "spec_replicas"
-     dest = "numSpecReplicas"
-  [[processors.rename.replace]]
-     field = "replicas_current"
-     dest = "numCurrentReplicas"
-  [[processors.rename.replace]]
-     field = "replicas_ready"
-     dest = "numReadyReplicas"
-  [[processors.rename.replace]]
-     tag = "daemonset_name"
-     dest = "daemonsetName"
-  [[processors.rename.replace]]
-     tag = "deployment_name"
-     dest = "deploymentName"
-  [[processors.rename.replace]]
-     tag = "container_name"
-     dest = "containerName"
-  [[processors.rename.replace]]
-     tag = "statefulset_name"
-     dest = "statefulsetName"
-  [[processors.rename.replace]]
-     tag = "node_name"
-     dest = "nodeName"
-
-#   ## Convert a tag value to uppercase
-#   # [[processors.strings.uppercase]]
-#   #   tag = "method"
-#
-#   ## Convert a field value to lowercase and store in a new field
-#   # [[processors.strings.lowercase]]
-#   #   field = "uri_stem"
-#   #   dest = "uri_stem_normalised"
-#
-#   ## Trim leading and trailing whitespace using the default cutset
-#   # [[processors.strings.trim]]
-#   #   field = "message"
-#
-#   ## Trim leading characters in cutset
-#   # [[processors.strings.trim_left]]
-#   #   field = "message"
-#   #   cutset = "\t"
-#
-#   ## Trim trailing characters in cutset
-#   # [[processors.strings.trim_right]]
-#   #   field = "message"
-#   #   cutset = "\r\n"
-#
-#   ## Trim the given prefix from the field
-#   # [[processors.strings.trim_prefix]]
-#   #   field = "my_value"
-#   #   prefix = "my_"
-#
-#   ## Trim the given suffix from the field
-#   # [[processors.strings.trim_suffix]]
-#   #   field = "read_count"
-#   #   suffix = "_count"
-
-
-# # Print all metrics that pass through this filter.
-# [[processors.topk]]
-#   ## How many seconds between aggregations
-#   # period = 10
-#
-#   ## How many top metrics to return
-#   # k = 10
-#
-#   ## Over which tags should the aggregation be done. Globs can be specified, in
-#   ## which case any tag matching the glob will aggregated over. If set to an
-#   ## empty list is no aggregation over tags is done
-#   # group_by = ['*']
-#
-#   ## Over which fields are the top k are calculated
-#   # fields = ["value"]
-#
-#   ## What aggregation to use. Options: sum, mean, min, max
-#   # aggregation = "mean"
-#
-#   ## Instead of the top k largest metrics, return the bottom k lowest metrics
-#   # bottomk = false
-#
-#   ## The plugin assigns each metric a GroupBy tag generated from its name and
-#   ## tags. If this setting is different than "" the plugin will add a
-#   ## tag (which name will be the value of this setting) to each metric with
-#   ## the value of the calculated GroupBy tag. Useful for debugging
-#   # add_groupby_tag = ""
-#
-#   ## These settings provide a way to know the position of each metric in
-#   ## the top k. The 'add_rank_field' setting allows to specify for which
-#   ## fields the position is required. If the list is non empty, then a field
-#   ## will be added to each and every metric for each string present in this
-#   ## setting. This field will contain the ranking of the group that
-#   ## the metric belonged to when aggregated over that field.
-#   ## The name of the field will be set to the name of the aggregation field,
-#   ## suffixed with the string '_topk_rank'
-#   # add_rank_fields = []
-#
-#   ## These settings provide a way to know what values the plugin is generating
-#   ## when aggregating metrics. The 'add_agregate_field' setting allows to
-#   ## specify for which fields the final aggregation value is required. If the
-#   ## list is non empty, then a field will be added to each every metric for
-#   ## each field present in this setting. This field will contain
-#   ## the computed aggregation for the group that the metric belonged to when
-#   ## aggregated over that field.
-#   ## The name of the field will be set to the name of the aggregation field,
-#   ## suffixed with the string '_topk_aggregate'
-#   # add_aggregate_fields = []
-
-
-
-###############################################################################
-#                            AGGREGATOR PLUGINS                               #
-###############################################################################
-
-# # Keep the aggregate basicstats of each metric passing through.
-# [[aggregators.basicstats]]
-#   ## General Aggregator Arguments:
-#   ## The period on which to flush & clear the aggregator.
-#   period = "30s"
-#   ## If true, the original metric will be dropped by the
-#   ## aggregator and will not get sent to the output plugins.
-#   drop_original = false
-
-
-# # Create aggregate histograms.
-# [[aggregators.histogram]]
-#   ## The period in which to flush the aggregator.
-#   period = "30s"
-#
-#   ## If true, the original metric will be dropped by the
-#   ## aggregator and will not get sent to the output plugins.
-#   drop_original = false
-#
-#   ## Example config that aggregates all fields of the metric.
-#   # [[aggregators.histogram.config]]
-#   #   ## The set of buckets.
-#   #   buckets = [0.0, 15.6, 34.5, 49.1, 71.5, 80.5, 94.5, 100.0]
-#   #   ## The name of metric.
-#   #   measurement_name = "cpu"
-#
-#   ## Example config that aggregates only specific fields of the metric.
-#   # [[aggregators.histogram.config]]
-#   #   ## The set of buckets.
-#   #   buckets = [0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
-#   #   ## The name of metric.
-#   #   measurement_name = "diskio"
-#   #   ## The concrete fields of metric
-#   #   fields = ["io_time", "read_time", "write_time"]
-
-
-# # Keep the aggregate min/max of each metric passing through.
-# [[aggregators.minmax]]
-#   ## General Aggregator Arguments:
-#   ## The period on which to flush & clear the aggregator.
-#   period = "30s"
-#   ## If true, the original metric will be dropped by the
-#   ## aggregator and will not get sent to the output plugins.
-#   drop_original = false
-
-
-# # Count the occurance of values in fields.
-# [[aggregators.valuecounter]]
-#   ## General Aggregator Arguments:
-#   ## The period on which to flush & clear the aggregator.
-#   period = "30s"
-#   ## If true, the original metric will be dropped by the
-#   ## aggregator and will not get sent to the output plugins.
-#   drop_original = false
-#   ## The fields for which the values will be counted
-#   fields = []
-
-
-
-###############################################################################
-#                            INPUT PLUGINS                                    #
-###############################################################################
-
-# Read metrics about cpu usage
-#[[inputs.cpu]]
-  ## Whether to report per-cpu stats or not
-#  percpu = false
-  ## Whether to report total system cpu stats or not
-#  totalcpu = true
-  ## If true, collect raw CPU time metrics.
-#  collect_cpu_time = false
-  ## If true, compute and report the sum of all non-idle CPU states.
-#  report_active = true
-#  fieldpass = ["usage_active","cluster","node","host","device"]
-#  taginclude = ["cluster","cpu","node"]
-  
-
-
-  # Read metrics from one or many prometheus clients
-#[[inputs.prometheus]]
-  ## An array of urls to scrape metrics from.
-#  urls = ["https://$METRICS_SERVER_SERVICE_HOST/metrics"]
-
-  ## An array of Kubernetes services to scrape metrics from.
-  # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
-
-  ## Kubernetes config file to create client from.
-  # kube_config = "/path/to/kubernetes.config"
-
-  ## Scrape Kubernetes pods for the following prometheus annotations:
-  ## - prometheus.io/scrape: Enable scraping for this pod
-  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
-  ##     set this to 'https' & most likely set the tls config.
-  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
-  ## - prometheus.io/port: If port is not 9102 use this annotation
-  # monitor_kubernetes_pods = true
-
-  ## Use bearer token for authorization
-#  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
-
-  ## Specify timeout duration for slower prometheus clients (default is 3s)
-#  response_timeout = "15s"
-
-  ## Optional TLS Config
-#  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
-  # tls_cert = /path/to/certfile
-  # tls_key = /path/to/keyfile
-  ## Use TLS but skip chain & host verification
-#  insecure_skip_verify = true
-  # Read stats about given file(s)
-[[inputs.kube_inventory]]
-  ## URL for the Kubernetes API
-  #url = "https://127.0.0.1"
-  url = "$K8SSERVICEHOST"
-
-  ## Namespace to use
-  namespace = ""
-
-  ## Use bearer token for authorization. ('bearer_token' takes priority)
-  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
-  ## OR
-  # bearer_token_string = "abc_123"
-
-  ## Set response_timeout (default 5 seconds)
-  response_timeout = "15s"
-
-  ## Optional Resources to exclude from gathering
-  ## Leave them with blank with try to gather everything available.
-  ## Values can be - "daemonsets", deployments", "nodes", "persistentvolumes",
-  ## "persistentvolumeclaims", "pods", "statefulsets"
-  # resource_exclude = [ "deployments", "nodes", "statefulsets" ]
-
-  ## Optional Resources to include when gathering
-  ## Overrides resource_exclude if both set.
-  # resource_include = [ "deployments", "nodes", "statefulsets" ]
-
-  ## Optional TLS Config
-  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
-  # tls_cert = "/path/to/certfile"
-  # tls_key = "/path/to/keyfile"
-  ## Use TLS but skip chain & host verification
-  insecure_skip_verify = true
-
-  namepass = ["kubernetes_daemonset", "kubernetes_deployment", "kubernetes_node", "kubernetes_pod_container", "kubernetes_statefulset"]
-  fieldpass = ["current_number_scheduled", "desired_number_scheduled", "number_available", "number_unavailable", "number_ready", "replicas_available", "replicas_unavailable", "capacity_cpu_cores", "capacity_memory_bytes", "capacity_pods", "allocatable_pods", "allocatable_cpu_cores", "allocatable_memory_bytes", "restarts_total","resource_requests_cpu_units", "resource_requests_memory_bytes", "resource_limits_cpu_units", "resource_limits_memory_bytes" , "spec_replicas", "replicas_current", "replicas_ready"]
-  taginclude = ["nodeName", "daemonset_name", "namespace", "deployment_name", "container_name", "namespace", "node_name","statefulset_name"]
-[[inputs.exec]]
-  ## Commands array
-  interval = "15m"
-  commands = [
-    "/opt/microsoft/docker-cimprov/bin/Telegraf403Telemetry.sh"
-  ]
-
-  ## Timeout for each command to complete.
-  timeout = "15s"
-
-  ## measurement name suffix (for separating different commands)
-  name_suffix = "_telemetry"
-
-  ## Data format to consume.
-  ## Each data format has its own unique set of configuration options, read
-  ## more about them here:
-  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
-  data_format = "influx"
\ No newline at end of file
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index dd86a50a9..70f74093c 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -16,17 +16,13 @@
 
 # Global tags can be specified here in key="value" format.
 [global_tags]
-  # dc = "us-east-1" # will tag all metrics with dc=us-east-1
-  # rack = "1a"
-  ## Environment variables can be used as tags, and throughout the config file
-  # user = "$USER"
-  # cluster = "$ACS_RESOURCE_NAME"
-  #node = $NODE_IP
+  #Below are entirely used for telemetry
   AgentVersion = "$AGENT_VERSION"
-  AKS_RESOURCE_ID = "$AKS_RESOURCE_ID"
-  Region = "$AKS_REGION"
-  ClusterName = "$AKS_CLUSTER_NAME"
-  ClusterType = "AKS"
+  AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
+  ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
+  Region = "$TELEMETRY_AKS_REGION"
+  ClusterName = "$TELEMETRY_CLUSTER_NAME"
+  ClusterType = "$TELEMETRY_CLUSTER_TYPE"
   Computer = "placeholder_hostname"
   ControllerType = "$CONTROLLER_TYPE"
 
@@ -60,7 +56,7 @@
 
   ## Default flushing interval for all outputs. You shouldn't set this below
   ## interval. Maximum flush_interval will be flush_interval + flush_jitter
-  flush_interval = "10s"
+  flush_interval = "60s"
   ## Jitter the flush interval by a random amount. This is primarily to avoid
   ## large write spikes for users running a large number of telegraf instances.
   ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
@@ -77,9 +73,9 @@
 
   ## Logging configuration:
   ## Run telegraf with debug log messages.
-  debug = true
+  debug = false
   ## Run telegraf in quiet mode (error log messages only).
-  quiet = false
+  quiet = true
   ## Specify the log file name. The empty string means to log to stderr.
   logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
 
@@ -93,41 +89,6 @@
 #                            OUTPUT PLUGINS                                   #
 ###############################################################################
 
-# Send aggregate metrics to Azure Monitor
-#[[outputs.azure_monitor]]
-  ## Timeout for HTTP writes.
-  # timeout = "20s"
-
-  ## Set the namespace prefix, defaults to "Telegraf/<input-name>".
-#  namespace_prefix = "Insights.Container/"
-
-  ## Azure Monitor doesn't have a string value type, so convert string
-  ## fields to dimensions (a.k.a. tags) if enabled. Azure Monitor allows
-  ## a maximum of 10 dimensions so Telegraf will only send the first 10
-  ## alphanumeric dimensions.
-#  strings_as_dimensions = true
-
-  ## Both region and resource_id must be set or be available via the
-  ## Instance Metadata service on Azure Virtual Machines.
-  #
-  ## Azure Region to publish metrics against.
-  ##   ex: region = "southcentralus"
-#  region = "placeholder_region"
-  #
-  ## The Azure Resource ID against which metric will be logged, e.g.
-  #resource_id = "/subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/virtualMachines/<vm_name>"
-#  resource_id = "placeholder_resource_id"
-
-  #azure_tenant_id = "placeholder_azure_tenant_id"
-
-  #azure_client_id = "placeholder_azure_client_id"
-
-  #azure_client_secret = "placeholder_azure_client_secret"
-
-  #namepass = ["nodes", "pods", "containers","prometheus"]
-#  namedrop = ["filestat", "telegraf_telemetry"]
-#  tagdrop = ["AgentVersion","AKS_RESOURCE_ID","Region","ClusterName","ClusterType", "Computer", "ControllerType"]
-
 # Generic socket writer capable of handling multiple socket types.
 [[outputs.socket_writer]]
   ## URL to connect to
@@ -160,12 +121,12 @@
   ## more about them here:
   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
   data_format = "json"
-  namedrop = ["filestat", "telegraf_telemetry"]
-  tagdrop = ["AgentVersion","AKS_RESOURCE_ID","Region","ClusterName","ClusterType", "Computer", "ControllerType"]
+  namedrop = ["telegraf_telemetry"]
+  tagdrop = ["AgentVersion","AKS_RESOURCE_ID", "ACS_RESOURCE_NAME", "Region","ClusterName","ClusterType", "Computer", "ControllerType"]
 
 [[outputs.application_insights]]
   ## Instrumentation key of the Application Insights resource.
-  instrumentation_key = "$APPLICATIONINSIGHTS_KEY"
+  instrumentation_key = "$TELEMETRY_APPLICATIONINSIGHTS_KEY"
 
   ## Timeout for closing (default: 5s).
   # timeout = "5s"
@@ -187,125 +148,6 @@
 #                            PROCESSOR PLUGINS                                #
 ###############################################################################
 
-# # Convert values to another metric value type
-# [[processors.converter]]
-#   ## Tags to convert
-#   ##
-#   ## The table key determines the target type, and the array of key-values
-#   ## select the keys to convert.  The array may contain globs.
-#   ##   <target-type> = [<tag-key>...]
-#   [processors.converter.tags]
-#     string = ["device"]
-#     integer = []
-#     unsigned = []
-#     boolean = []
-#     float = []
-#
-#   ## Fields to convert
-#   ##
-#   ## The table key determines the target type, and the array of key-values
-#   ## select the keys to convert.  The array may contain globs.
-#   ##   <target-type> = [<field-key>...]
-#   [processors.converter.fields]
-#     tag = ["host"]
-#     string = []
-#     integer = []
-#     unsigned = []
-#     boolean = []
-#     float = []
-
-
-# # Map enum values according to given table.
-# [[processors.enum]]
-#   [[processors.enum.mapping]]
-#     ## Name of the field to map
-#     field = "status"
-#
-#     ## Destination field to be used for the mapped value.  By default the source
-#     ## field is used, overwriting the original value.
-#     # dest = "status_code"
-#
-#     ## Default value to be used for all values not contained in the mapping
-#     ## table.  When unset, the unmodified value for the field will be used if no
-#     ## match is found.
-#     # default = 0
-#
-#     ## Table of mappings
-#     [processors.enum.mapping.value_mappings]
-#       green = 1
-#       yellow = 2
-#       red = 3
-
-
-# # Apply metric modifications using override semantics.
-# [[processors.override]]
-#   ## All modifications on inputs and aggregators can be overridden:
-#   # name_override = "new_name"
-#   # name_prefix = "new_name_prefix"
-#   # name_suffix = "new_name_suffix"
-#
-#   ## Tags to be added (all values must be strings)
-#   # [processors.override.tags]
-#   #   additional_tag = "tag_value"
-
-
-# # Parse a value in a specified field/tag(s) and add the result in a new metric
-# [[processors.parser]]
-#   ## The name of the fields whose value will be parsed.
-#   parse_fields = []
-#
-#   ## If true, incoming metrics are not emitted.
-#   drop_original = false
-#
-#   ## If set to override, emitted metrics will be merged by overriding the
-#   ## original metric using the newly parsed metrics.
-#   merge = "override"
-#
-#   ## The dataformat to be read from files
-#   ## Each data format has its own unique set of configuration options, read
-#   ## more about them here:
-#   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
-#   data_format = "influx"
-
-
-# # Print all metrics that pass through this filter.
-# [[processors.printer]]
-
-
-# # Transforms tag and field values with regex pattern
-# [[processors.regex]]
-#   ## Tag and field conversions defined in a separate sub-tables
-#   # [[processors.regex.tags]]
-#   #   ## Tag to change
-#   #   key = "resp_code"
-#   #   ## Regular expression to match on a tag value
-#   #   pattern = "^(\\d)\\d\\d$"
-#   #   ## Pattern for constructing a new value (${1} represents first subgroup)
-#   #   replacement = "${1}xx"
-#
-#   # [[processors.regex.fields]]
-#   #   key = "request"
-#   #   ## All the power of the Go regular expressions available here
-#   #   ## For example, named subgroups
-#   #   pattern = "^/api(?P<method>/[\\w/]+)\\S*"
-#   #   replacement = "${method}"
-#   #   ## If result_key is present, a new field will be created
-#   #   ## instead of changing existing field
-#   #   result_key = "method"
-#
-#   ## Multiple conversions may be applied for one field sequentially
-#   ## Let's extract one more value
-#   # [[processors.regex.fields]]
-#   #   key = "request"
-#   #   pattern = ".*category=(\\w+).*"
-#   #   replacement = "${1}"
-#   #   result_key = "search_category"
-
-
-# # Rename measurements, tags, and fields that pass through this filter.
-# [[processors.rename]]
-
-
 # # Perform string processing on tags, fields, and measurements
 [[processors.rename]]
   [[processors.rename.replace]]
@@ -320,66 +162,66 @@
   [[processors.rename.replace]]
      field = "used_percent"
      dest = "diskUsedPercentage"
-  [[processors.rename.replace]]
-     measurement = "net"
-     dest = "nodes"
-  [[processors.rename.replace]]
-     field = "bytes_recv"
-     dest = "networkBytesReceivedTotal"
-  [[processors.rename.replace]]
-     field = "bytes_sent"
-     dest = "networkBytesSentTotal"
-  [[processors.rename.replace]]
-     field = "err_in"
-     dest = "networkErrorsInTotal"
-  [[processors.rename.replace]]
-     field = "err_out"
-     dest = "networkErrorsOutTotal"
-  [[processors.rename.replace]]
-     measurement = "kubernetes_pod_volume"
-     dest = "pods"
-  [[processors.rename.replace]]
-     field = "used_bytes"
-     dest = "podVolumeUsedBytes"
-  [[processors.rename.replace]]
-     field = "available_bytes"
-     dest = "podVolumeAvailableBytes"
-  [[processors.rename.replace]]
-     measurement = "kubernetes_pod_network"
-     dest = "pods"
-  [[processors.rename.replace]]
-     field = "tx_errors"
-     dest = "podNetworkTxErrorsTotal"
-  [[processors.rename.replace]]
-     field = "rx_errors"
-     dest = "podNetworkRxErrorsTotal"
-  [[processors.rename.replace]]
-     tag = "volume_name"
-     dest = "volumeName"
-  [[processors.rename.replace]]
-     tag = "pod_name"
-     dest = "podName"
-  [[processors.rename.replace]]
-     measurement = "docker"
-     dest = "containers"
-  [[processors.rename.replace]]
-     measurement = "docker_container_status"
-     dest = "containers"
-  [[processors.rename.replace]]
-     field = "n_containers"
-     dest = "numContainers"
-  [[processors.rename.replace]]
-     field = "n_containers_running"
-     dest = "numContainersRunning"
-  [[processors.rename.replace]]
-     field = "n_containers_stopped"
-     dest = "numContainersStopped"
-  [[processors.rename.replace]]
-     field = "n_containers_paused"
-     dest = "numContainersPaused"
-  [[processors.rename.replace]]
-     field = "n_images"
-     dest = "numContainerImages"
+  #[[processors.rename.replace]]
+  #   measurement = "net"
+  #   dest = "nodes"
+  #[[processors.rename.replace]]
+  #   field = "bytes_recv"
+  #   dest = "networkBytesReceivedTotal"
+  #[[processors.rename.replace]]
+  #   field = "bytes_sent"
+  #   dest = "networkBytesSentTotal"
+  #[[processors.rename.replace]]
+  #   field = "err_in"
+  #   dest = "networkErrorsInTotal"
+  #[[processors.rename.replace]]
+  #   field = "err_out"
+  #   dest = "networkErrorsOutTotal"
+  #[[processors.rename.replace]]
+  #   measurement = "kubernetes_pod_volume"
+  #   dest = "pods"
+  #[[processors.rename.replace]]
+  #   field = "used_bytes"
+  #   dest = "podVolumeUsedBytes"
+  #[[processors.rename.replace]]
+  #   field = "available_bytes"
+  #   dest = "podVolumeAvailableBytes"
+  #[[processors.rename.replace]]
+  #   measurement = "kubernetes_pod_network"
+  #   dest = "pods"
+  #[[processors.rename.replace]]
+  #   field = "tx_errors"
+  #   dest = "podNetworkTxErrorsTotal"
+  #[[processors.rename.replace]]
+  #   field = "rx_errors"
+  #   dest = "podNetworkRxErrorsTotal"
+  #[[processors.rename.replace]]
+  #   tag = "volume_name"
+  #   dest = "volumeName"
+  #[[processors.rename.replace]]
+  #   tag = "pod_name"
+  #   dest = "podName"
+  #[[processors.rename.replace]]
+  #   measurement = "docker"
+  #   dest = "containers"
+  #[[processors.rename.replace]]
+  #   measurement = "docker_container_status"
+  #   dest = "containers"
+  #[[processors.rename.replace]]
+  #   field = "n_containers"
+  #   dest = "numContainers"
+  #[[processors.rename.replace]]
+  #   field = "n_containers_running"
+  #   dest = "numContainersRunning"
+  #[[processors.rename.replace]]
+  #   field = "n_containers_stopped"
+  #   dest = "numContainersStopped"
+  #[[processors.rename.replace]]
+  #   field = "n_containers_paused"
+  #   dest = "numContainersPaused"
+  #[[processors.rename.replace]]
+  #   field = "n_images"
+  #   dest = "numContainerImages"
 
 #   ## Convert a tag value to uppercase
 #   # [[processors.strings.uppercase]]
@@ -567,7 +409,7 @@
 
 
 # Read metrics about network interface usage
-[[inputs.net]]
+#[[inputs.net]]
   ## By default, telegraf gathers stats from any up interface (excluding loopback)
   ## Setting interfaces will tell it to gather these explicit interfaces,
   ## regardless of status.
@@ -577,17 +419,17 @@
   ## On linux systems telegraf also collects protocol stats.
   ## Setting ignore_protocol_stats to true will skip reporting of protocol metrics.
   ##
-  ignore_protocol_stats = true
+#  ignore_protocol_stats = true
   ##
   #fieldpass = ["bytes_recv", "bytes_sent", "err_in", "err_out"]
-  fieldpass = ["err_in", "err_out"]
-  taginclude = ["interface","nodeName"]
+  #fieldpass = ["err_in", "err_out"]
+  #taginclude = ["interface","nodeName"]
 
 # Read metrics from the kubernetes kubelet api
-[[inputs.kubernetes]]
+#[[inputs.kubernetes]]
   ## URL for the kubelet
   #url = "http://1.1.1.1:10255"
-  url = "http://placeholder_nodeip:10255"
+#  url = "http://placeholder_nodeip:10255"
 
   ## Use bearer token for authorization
   # bearer_token = /path/to/bearer/token
@@ -601,46 +443,46 @@
   # tls_key = /path/to/keyfile
   ## Use TLS but skip chain & host verification
   # insecure_skip_verify = false
-  fieldpass = ["used_bytes", "available_bytes", "tx_errors", "rx_errors"  ]
-  taginclude = ["volume_name","nodeName","namespace","pod_name"]
+#  fieldpass = ["used_bytes", "available_bytes", "tx_errors", "rx_errors"  ]
+#  taginclude = ["volume_name","nodeName","namespace","pod_name"]
 # Read metrics about docker containers
-[[inputs.docker]]
+#[[inputs.docker]]
   ## Docker Endpoint
   ##   To use TCP, set endpoint = "tcp://[ip]:[port]"
   ##   To use environment variables (ie, docker-machine), set endpoint = "ENV"
-  endpoint = "unix:///var/run/host/docker.sock"
+#  endpoint = "unix:///var/run/host/docker.sock"
 
   ## Set to true to collect Swarm metrics(desired_replicas, running_replicas)
-  gather_services = false
+#  gather_services = false
 
   ## Only collect metrics for these containers, collect all if empty
-  container_names = []
+#  container_names = []
 
   ## Containers to include and exclude. Globs accepted.
   ## Note that an empty array for both will include all containers
-  container_name_include = []
-  container_name_exclude = []
+#  container_name_include = []
+#  container_name_exclude = []
 
   ## Container states to include and exclude. Globs accepted.
   ## When empty only containers in the "running" state will be captured.
-  container_state_include = ['*']
+#  container_state_include = ['*']
   # container_state_exclude = []
 
   ## Timeout for docker list, info, and stats commands
-  timeout = "5s"
+#  timeout = "5s"
 
   ## Whether to report for each container per-device blkio (8:0, 8:1...) and
   ## network (eth0, eth1, ...) stats or not
-  perdevice = true
+#  perdevice = true
   ## Whether to report for each container total blkio and network stats or not
-  total = true
+#  total = true
   ## Which environment variables should we use as a tag
   ##tag_env = ["JAVA_HOME", "HEAP_SIZE"]
 
   ## docker labels to include and exclude as tags.  Globs accepted.
   ## Note that an empty array for both will include all labels as tags
-  docker_label_include = []
-  docker_label_exclude = []
+#  docker_label_include = []
+#  docker_label_exclude = []
 
   ## Optional TLS Config
   # tls_ca = "/etc/telegraf/ca.pem"
@@ -648,14 +490,14 @@
   # tls_key = "/etc/telegraf/key.pem"
   ## Use TLS but skip chain & host verification
   # insecure_skip_verify = false
-  fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images"]
+#  fieldpass = ["n_containers", "n_containers_running", "n_containers_stopped", "n_containers_paused", "n_images"]
   #fieldpass = ["numContainers", "numContainersRunning", "numContainersStopped", "numContainersPaused", "numContainerImages"]
-  taginclude = ["nodeName"]
+#  taginclude = ["nodeName"]
 [[inputs.exec]]
   ## Commands array
   interval = "15m"
   commands = [
-    "/opt/microsoft/docker-cimprov/bin/Telegraf403Telemetry.sh"
+    "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
   ]
 
   ## Timeout for each command to complete.
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index e6e6401d2..b11a0f2e4 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -94,9 +94,8 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms-rs.conf;			        installer/conf/out_oms-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
-/etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/custom_metrics_regions.conf;		installer/conf/custom_metrics_regions.conf;      644; root; root
-/opt/microsoft/docker-cimprov/bin/Telegraf403Telemetry.sh;	        installer/scripts/Telegraf403Telemetry.sh;      755; root; root
+/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	        installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
diff --git a/installer/scripts/Telegraf403Telemetry.sh b/installer/scripts/Telegraf403Telemetry.sh
deleted file mode 100644
index c64369798..000000000
--- a/installer/scripts/Telegraf403Telemetry.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-countErr=$(grep -iF  "Error writing to output [socket_writer]" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n')
-echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID} telegrafTCPWriteErrorCountTotal=${countErr}i"
\ No newline at end of file
diff --git a/installer/scripts/TelegrafTCPErrorTelemetry.sh b/installer/scripts/TelegrafTCPErrorTelemetry.sh
new file mode 100644
index 000000000..ee8bf74a1
--- /dev/null
+++ b/installer/scripts/TelegrafTCPErrorTelemetry.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+countErr=$(grep -iF  "[socket_writer]" /var/opt/microsoft/docker-cimprov/log/telegraf.log | wc -l | tr -d '\n')
+echo "telegraf,AKS_RESOURCE_ID=${AKS_RESOURCE_ID} telegrafTCPWriteErrorCountTotal=${countErr}i"
\ No newline at end of file

From 574077850e5178b738d616699052bc60d3161e03 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Thu, 21 Mar 2019 16:54:38 -0700
Subject: [PATCH 36/38] remove custom metric regions as we dont need anymore

---
 installer/conf/custom_metrics_regions.conf | 7 -------
 installer/conf/td-agent-bit.conf           | 2 +-
 installer/datafiles/base_container.data    | 1 -
 3 files changed, 1 insertion(+), 9 deletions(-)
 delete mode 100644 installer/conf/custom_metrics_regions.conf

diff --git a/installer/conf/custom_metrics_regions.conf b/installer/conf/custom_metrics_regions.conf
deleted file mode 100644
index bf548abdd..000000000
--- a/installer/conf/custom_metrics_regions.conf
+++ /dev/null
@@ -1,7 +0,0 @@
-eastus
-southcentralus
-westcentralus
-westus2
-southeastasia
-northeurope
-westeurope
\ No newline at end of file
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 8f2e5b5cd..2d87fe136 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -46,7 +46,7 @@
 [FILTER]
     Name grep
     Match oms.container.log.telegraf.err.*
-    Regex log /^(?:(?!\[azure_monitor\]: failed to write batch: \[403\] 403 Forbidden).)*$/
+    #Regex log /^(?:(?!\[azure_monitor\]: failed to write batch: \[403\] 403 Forbidden).)*$/
 
 [OUTPUT]
     Name                            oms
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index b11a0f2e4..89d63047a 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -94,7 +94,6 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms-rs.conf;			        installer/conf/out_oms-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
-/etc/opt/microsoft/docker-cimprov/custom_metrics_regions.conf;		installer/conf/custom_metrics_regions.conf;      644; root; root
 /opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	        installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
 
 %Links

From 9d534d0dfdc18c2a3917c22741ee90b8729e629a Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Thu, 21 Mar 2019 16:58:20 -0700
Subject: [PATCH 37/38] remove un-needed files

---
 installer/conf/td-agent-bit-rs.conf     | 26 -------------------------
 installer/datafiles/base_container.data |  1 -
 2 files changed, 27 deletions(-)
 delete mode 100644 installer/conf/td-agent-bit-rs.conf

diff --git a/installer/conf/td-agent-bit-rs.conf b/installer/conf/td-agent-bit-rs.conf
deleted file mode 100644
index 7993e7528..000000000
--- a/installer/conf/td-agent-bit-rs.conf
+++ /dev/null
@@ -1,26 +0,0 @@
-[SERVICE]
-    Flush         30
-    Log_Level     info
-    Parsers_File  /etc/td-agent-bit/parsers.conf
-    Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log
-
-[INPUT]
-    Name tail
-    Tag oms.container.log.telegraf.err.*
-    Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
-    DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
-    Mem_Buf_Limit 2m
-    Path_Key filepath
-    Skip_Long_Lines On
-    Ignore_Older 1h
-
-[FILTER]
-    Name grep
-    Match oms.container.log.telegraf.err.*
-    Regex log /^(?:(?!\[azure_monitor\]: failed to write batch: \[403\] 403 Forbidden).)*$/
-
-[OUTPUT]
-    Name                            oms
-    EnableTelemetry                 false
-    TelemetryPushIntervalSeconds    300
-    Match                           oms.container.log.*
\ No newline at end of file
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 6e0cdde22..4ee32e580 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -96,7 +96,6 @@ MAINTAINER:              'Microsoft Corporation'
 
 /opt/td-agent-bit/bin/out_oms.so;                                intermediate/${{BUILD_CONFIGURATION}}/out_oms.so;  	755; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
-/etc/opt/microsoft/docker-cimprov/td-agent-bit-rs.conf;			    installer/conf/td-agent-bit-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms-rs.conf;			        installer/conf/out_oms-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root

From bfb7331adf764ceeb054d8113bcf85699f2a8c52 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Thu, 21 Mar 2019 16:59:31 -0700
Subject: [PATCH 38/38] fixes

---
 installer/conf/out_oms-rs.conf          | 6 ------
 installer/datafiles/base_container.data | 1 -
 2 files changed, 7 deletions(-)
 delete mode 100644 installer/conf/out_oms-rs.conf

diff --git a/installer/conf/out_oms-rs.conf b/installer/conf/out_oms-rs.conf
deleted file mode 100644
index e3a32a526..000000000
--- a/installer/conf/out_oms-rs.conf
+++ /dev/null
@@ -1,6 +0,0 @@
-omsadmin_conf_path=/etc/opt/microsoft/omsagent/conf/omsadmin.conf
-cert_file_path=/etc/opt/microsoft/omsagent/certs/oms.crt
-key_file_path=/etc/opt/microsoft/omsagent/certs/oms.key
-container_host_file_path=/var/opt/microsoft/docker-cimprov/state/containerhostname
-container_inventory_refresh_interval=86400
-kube_system_containers_refresh_interval=86400
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 4ee32e580..2d6fd7b01 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -97,7 +97,6 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/td-agent-bit/bin/out_oms.so;                                intermediate/${{BUILD_CONFIGURATION}}/out_oms.so;  	755; root; root
 /etc/opt/microsoft/docker-cimprov/td-agent-bit.conf;			    installer/conf/td-agent-bit.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
-/etc/opt/microsoft/docker-cimprov/out_oms-rs.conf;			        installer/conf/out_oms-rs.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
 /opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	        installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root