From ea28d08a1566e9666b83ac0724e82e4f619f2629 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Tue, 18 Jun 2019 17:02:30 -0700
Subject: [PATCH 01/12] hard code config for UST CCP team

---
 installer/conf/telegraf-rs.conf | 38 ++++++++++++++++++++++++++++++++
 installer/conf/telegraf.conf    | 39 +++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index cb9a36685..d7e6fd16c 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -535,6 +535,44 @@
 #  insecure_skip_verify = true
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 #  [inputs.prometheus.tagpass]
+[[inputs.prometheus]]
+  #name_prefix="container.azm.ms/"
+  ## An array of urls to scrape metrics from.
+  #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
+  #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
+
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## An array of Kubernetes services to scrape metrics from.
+  kubernetes_services = ["http://prometheus-operated.monitoring:9090/metrics", "https://kube-state-metrics.monitoring:9443"]
+
+  ## Kubernetes config file to create client from.
+  # kube_config = "/path/to/kubernetes.config"
+
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to `https` & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
+  # monitor_kubernetes_pods = true
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  ## OR
+  # bearer_token_string = "abc_123"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  #tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
+  #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 
 [[inputs.exec]]
   ## Commands array
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 06b1c55eb..185fea5be 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -568,6 +568,45 @@
   insecure_skip_verify = true
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 
+[[inputs.prometheus]]
+  #name_prefix="container.azm.ms/"
+  ## An array of urls to scrape metrics from.
+  urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
+  #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
+
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## An array of Kubernetes services to scrape metrics from.
+  # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
+
+  ## Kubernetes config file to create client from.
+  # kube_config = "/path/to/kubernetes.config"
+
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to `https` & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
+  # monitor_kubernetes_pods = true
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  ## OR
+  # bearer_token_string = "abc_123"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  #tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
+  #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
+
 [[inputs.exec]]
   ## Commands array
   interval = "15m"

From e9cfdaa41cb997c3d104a5b4b456fc163759fb1a Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 21 Jun 2019 14:48:49 -0700
Subject: [PATCH 02/12] fix config

---
 installer/conf/td-agent-bit-rs.conf | 7 ++++---
 installer/conf/telegraf-rs.conf     | 4 ++--
 installer/conf/telegraf.conf        | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/installer/conf/td-agent-bit-rs.conf b/installer/conf/td-agent-bit-rs.conf
index 7945261aa..0e7218a2b 100644
--- a/installer/conf/td-agent-bit-rs.conf
+++ b/installer/conf/td-agent-bit-rs.conf
@@ -8,11 +8,12 @@
     Name tail
     Tag oms.container.log.telegraf.err.*
     Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
-    DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
-    Mem_Buf_Limit 2m
+    DB /var/opt/microsoft/docker-cimprov/state/telegraf-rs-log-state.db
+    DB.Sync Off
+    Mem_Buf_Limit 1m
     Path_Key filepath
     Skip_Long_Lines On
-    Ignore_Older 5m
+    Ignore_Older 2m
 
 [INPUT]
     Name        tcp
diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index d7e6fd16c..75ecadb34 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -75,7 +75,7 @@
   ## Run telegraf with debug log messages.
   debug = false
   ## Run telegraf in quiet mode (error log messages only).
-  quiet = true
+  quiet = false
   ## Specify the log file name. The empty string means to log to stderr.
   logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
 
@@ -545,7 +545,7 @@
   url_tag = "scrapeUrl"
 
   ## An array of Kubernetes services to scrape metrics from.
-  kubernetes_services = ["http://prometheus-operated.monitoring:9090/metrics", "https://kube-state-metrics.monitoring:9443"]
+  kubernetes_services = ["http://prometheus-operated.monitoring:9090/metrics","http://prometheus-operator.monitoring:8080/metrics", "http://prometheus-k8s.monitoring:9090/metrics","https://kube-state-metrics.monitoring:8443/metrics"]
 
   ## Kubernetes config file to create client from.
   # kube_config = "/path/to/kubernetes.config"
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 185fea5be..2868f3c8b 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -571,7 +571,7 @@
 [[inputs.prometheus]]
   #name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
-  urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
+  urls = ["http://kubelet.kube-system:10255/metrics", "http://kubelet.kube-system:10255/cadvisor", "http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics", "https://node-exporter.monitoring:9100/metrics"]
   #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
 
   metric_version = 2

From f2724f04d7e5e6e41e8349a9a0930e177495abad Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 21 Jun 2019 15:04:44 -0700
Subject: [PATCH 03/12] fix config after discussion

---
 installer/conf/telegraf-rs.conf | 2 +-
 installer/conf/telegraf.conf    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index 75ecadb34..abf0fef14 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -545,7 +545,7 @@
   url_tag = "scrapeUrl"
 
   ## An array of Kubernetes services to scrape metrics from.
-  kubernetes_services = ["http://prometheus-operated.monitoring:9090/metrics","http://prometheus-operator.monitoring:8080/metrics", "http://prometheus-k8s.monitoring:9090/metrics","https://kube-state-metrics.monitoring:8443/metrics"]
+  kubernetes_services = ["http://prometheus-operated.monitoring:9090/metrics","http://prometheus-operator.monitoring:8080/metrics", "http://prometheus-k8s.monitoring:9090/metrics","https://kube-state-metrics.monitoring:8443/metrics","https://kube-state-metrics.monitoring:9443/metrics"]
 
   ## Kubernetes config file to create client from.
   # kube_config = "/path/to/kubernetes.config"
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 2868f3c8b..80c3dd564 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -571,14 +571,14 @@
 [[inputs.prometheus]]
   #name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
-  urls = ["http://kubelet.kube-system:10255/metrics", "http://kubelet.kube-system:10255/cadvisor", "http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics", "https://node-exporter.monitoring:9100/metrics"]
+  #urls = ["http://kubelet.kube-system:10255/metrics", "http://kubelet.kube-system:10255/cadvisor", "http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics", "https://node-exporter.monitoring:9100/metrics"]
   #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
 
   metric_version = 2
   url_tag = "scrapeUrl"
 
   ## An array of Kubernetes services to scrape metrics from.
-  # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
+  kubernetes_services = ["http://kubelet.kube-system:10255/metrics", "http://kubelet.kube-system:10255/cadvisor", "http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics", "https://node-exporter.monitoring:9100/metrics"]
 
   ## Kubernetes config file to create client from.
   # kube_config = "/path/to/kubernetes.config"

From ffc7a378090623560c3a4e23bd590312db12886a Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 21 Jun 2019 15:16:35 -0700
Subject: [PATCH 04/12] fix error log to get errros

---
 installer/conf/telegraf.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 80c3dd564..226be2f43 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -75,7 +75,7 @@
   ## Run telegraf with debug log messages.
   debug = false
   ## Run telegraf in quiet mode (error log messages only).
-  quiet = true
+  quiet = false
   ## Specify the log file name. The empty string means to log to stderr.
   logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
 

From 7146253e878d72a77bd968f2206e8965e7827577 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Fri, 21 Jun 2019 18:02:10 -0700
Subject: [PATCH 05/12] fix config

---
 installer/conf/telegraf-rs.conf | 2 +-
 installer/conf/telegraf.conf    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index abf0fef14..07233cf15 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -545,7 +545,7 @@
   url_tag = "scrapeUrl"
 
   ## An array of Kubernetes services to scrape metrics from.
-  kubernetes_services = ["http://prometheus-operated.monitoring:9090/metrics","http://prometheus-operator.monitoring:8080/metrics", "http://prometheus-k8s.monitoring:9090/metrics","https://kube-state-metrics.monitoring:8443/metrics","https://kube-state-metrics.monitoring:9443/metrics"]
+  kubernetes_services = ["https://kube-state-metrics.monitoring:8443/metrics","https://kube-state-metrics.monitoring:9443/metrics","http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics"]
 
   ## Kubernetes config file to create client from.
   # kube_config = "/path/to/kubernetes.config"
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 226be2f43..eadb60377 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -571,14 +571,14 @@
 [[inputs.prometheus]]
   #name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
-  #urls = ["http://kubelet.kube-system:10255/metrics", "http://kubelet.kube-system:10255/cadvisor", "http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics", "https://node-exporter.monitoring:9100/metrics"]
+  urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "https://$NODE_IP:9100/metrics"]
   #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
 
   metric_version = 2
   url_tag = "scrapeUrl"
 
   ## An array of Kubernetes services to scrape metrics from.
-  kubernetes_services = ["http://kubelet.kube-system:10255/metrics", "http://kubelet.kube-system:10255/cadvisor", "http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics", "https://node-exporter.monitoring:9100/metrics"]
+  #kubernetes_services = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "https://$NODE_IP:9100/metrics"]
 
   ## Kubernetes config file to create client from.
   # kube_config = "/path/to/kubernetes.config"

From 0172a022a608aa18ab8f117a55f1d59f334a743d Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Mon, 24 Jun 2019 16:52:49 -0700
Subject: [PATCH 06/12] update config

---
 installer/conf/telegraf-rs.conf | 14 +++++++++++++-
 installer/conf/telegraf.conf    | 13 ++++++++++---
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index 07233cf15..bde27f279 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -535,17 +535,29 @@
 #  insecure_skip_verify = true
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 #  [inputs.prometheus.tagpass]
+
+#Prometheus Custom Metrics
 [[inputs.prometheus]]
   #name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
   #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
   #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
+  interval: "$AZMON_RS_PROM_INTERVAL"
+  ## An array of urls to scrape metrics from.
+  urls = ["$AZMON_RS_PROM_URLS"]
+
+  kubernetes_services = ["$AZMON_RS_PROM_K8S_SERVICES"]
+  monitor_kubernetes_pods = $AZMON_RS_PROM_MONITOR_PODS
+
+
+  fieldpass = ["$AZMON_RS_PROM_FIELDPASS"]
+  fielddrop = ["$AZMON_RS_PROM_FIELDDROP"]
 
   metric_version = 2
   url_tag = "scrapeUrl"
 
   ## An array of Kubernetes services to scrape metrics from.
-  kubernetes_services = ["https://kube-state-metrics.monitoring:8443/metrics","https://kube-state-metrics.monitoring:9443/metrics","http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics"]
+  #kubernetes_services = ["https://kube-state-metrics.monitoring:8443/metrics","https://kube-state-metrics.monitoring:9443/metrics","http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics"]
 
   ## Kubernetes config file to create client from.
   # kube_config = "/path/to/kubernetes.config"
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index eadb60377..fa1d72ea7 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -568,11 +568,18 @@
   insecure_skip_verify = true
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 
+
+## prometheus custom metrics
 [[inputs.prometheus]]
-  #name_prefix="container.azm.ms/"
+
+  interval: "$AZMON_DS_PROM_INTERVAL"
+
   ## An array of urls to scrape metrics from.
-  urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "https://$NODE_IP:9100/metrics"]
-  #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
+  urls = ["$AZMON_DS_PROM_URLS"]
+
+  fieldpass = ["$AZMON_DS_PROM_FIELDPASS"]
+  
+  fielddrop = ["$AZMON_DS_PROM_FIELDDROP"]
 
   metric_version = 2
   url_tag = "scrapeUrl"

From 4b213e2c44b8bcffee3900078b4d3f44848ac524 Mon Sep 17 00:00:00 2001
From: Vishwanath Narasimhan <visnara@microsoft.com>
Date: Thu, 27 Jun 2019 15:37:47 -0700
Subject: [PATCH 07/12] Add telemetry

---
 source/code/go/src/plugins/oms.go             |  4 +--
 .../code/plugin/CAdvisorMetricsAPIClient.rb   | 34 ++++++++++++++++++-
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index b925e7145..301aff1ed 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -34,14 +34,12 @@ const ResourceIdEnv = "AKS_RESOURCE_ID"
 //env variable which has ResourceName for NON-AKS
 const ResourceNameEnv = "ACS_RESOURCE_NAME"
 
-// Origin prefix for telegraf Metrics (used as prefix for origin field & prefix for azure monitor specific tags)
+// Origin prefix for telegraf Metrics (used as prefix for origin field & prefix for azure monitor specific tags and also for custom-metrics telemetry )
 const TelegrafMetricOriginPrefix = "container.azm.ms"
 
 // Origin suffix for telegraf Metrics (used as suffix for origin field)
 const TelegrafMetricOriginSuffix = "telegraf"
 
-// Namespace prefix for telegraf Metrics (used as prefix for Namespace field)
-//const TelegrafMetricNamespacePrefix = "plugin"
 // clusterName tag
 const TelegrafTagClusterName = "clusterName"
 
diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb
index b842edb29..ec38bcbb5 100644
--- a/source/code/plugin/CAdvisorMetricsAPIClient.rb
+++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb
@@ -14,12 +14,31 @@ class CAdvisorMetricsAPIClient
   require_relative "ApplicationInsightsUtility"
 
   @configMapMountPath = "/etc/config/settings/log-data-collection-settings"
+  @promConfigMountPath = "/etc/config/settings/prometheus-data-collection-settings"
   @clusterEnvVarCollectionEnabled = ENV["AZMON_CLUSTER_COLLECT_ENV_VAR"]
   @clusterStdErrLogCollectionEnabled = ENV["AZMON_COLLECT_STDERR_LOGS"]
   @clusterStdOutLogCollectionEnabled = ENV["AZMON_COLLECT_STDOUT_LOGS"]
   @clusterLogTailExcludPath = ENV["AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH"]
   @clusterLogTailPath = ENV["AZMON_LOG_TAIL_PATH"]
   @clusterAgentSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+
+  @rsPromInterval = ENV["TELEMETRY_RS_PROM_INTERVAL"]
+  @dsPromInterval = ENV["TELEMETRY_DS_PROM_INTERVAL"]
+  
+  @rsPromFieldPassCount = ENV["TELEMETRY_RS_PROM_FIELDPASS_LENGTH"]
+  @dsPromFieldPassCount = ENV["TELEMETRY_DS_PROM_FIELDPASS_LENGTH"]
+  
+  @rsPromFieldDropCount = ENV["TELEMETRY_RS_PROM_FIELDDROP_LENGTH"]
+  @dsPromFieldDropCount = ENV["TELEMETRY_DS_PROM_FIELDDROP_LENGTH"]
+
+  @rsPromK8sServiceCount = ENV["TELEMETRY_RS_PROM_K8S_SERVICES_LENGTH"]
+
+  @rsPromUrlCount = ENV["TELEMETRY_RS_PROM_URLS_LENGTH"]
+  @dsPromUrlCount = ENV["TELEMETRY_DS_PROM_URLS_LENGTH"]
+
+  @rsPromMonitorPods = ENV["TELEMETRY_RS_PROM_MONITOR_PODS"]
+  
+
   @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
   @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
   #   @@rxBytesLast = nil
@@ -199,7 +218,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                     telemetryProps["PodName"] = podName
                     telemetryProps["ContainerName"] = containerName
                     telemetryProps["Computer"] = hostName
-                    #telemetry about custom log collections setting
+                    #telemetry about log collections settings
                     if (File.file?(@configMapMountPath))
                       telemetryProps["clustercustomsettings"] = true
                       telemetryProps["clusterenvvars"] = @clusterEnvVarCollectionEnabled
@@ -209,6 +228,19 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                       telemetryProps["clusterLogTailPath"] = @clusterLogTailPath
                       telemetryProps["clusterAgentSchemaVersion"] = @clusterAgentSchemaVersion
                     end
+                    #telemetry about prometheus metric collections settings
+                    if (File.file?(@promConfigMountPath))
+                      telemetryProps["rsPromInt"] = @rsPromInterval
+                      telemetryProps["dsPromInt"] = @dsPromInterval
+                      telemetryProps["rsPromFPC"] = @rsPromFieldPassCount
+                      telemetryProps["dsPromFPC"] = @dsPromFieldPassCount
+                      telemetryProps["rsPromFDC"] = @rsPromFieldDropCount
+                      telemetryProps["dsPromFDC"] = @dsPromFieldDropCount
+                      telemetryProps["rsPromServ"] = @rsPromK8sServiceCount
+                      telemetryProps["rsPromUrl"] = @rsPromUrlCount
+                      telemetryProps["dsPromUrl"] = @dsPromUrlCount
+                      telemetryProps["rsPromMonPods"] = @rsPromMonitorPods
+                    end
                     ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
                   end
                 end

From 903071b411f5babe3b9296b60a8de03b9746c8d2 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 27 Jun 2019 16:07:13 -0700
Subject: [PATCH 08/12] Rashmi/promcustomconfig (#231)

* changes

* formatting changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* adding telemetry

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* cahnges

* changes
---
 installer/conf/telegraf-rs.conf               |   2 +-
 installer/conf/telegraf-test-rs.conf          | 113 +++++++++++
 installer/conf/telegraf-test.conf             | 100 ++++++++++
 installer/conf/telegraf.conf                  |   2 +-
 installer/datafiles/base_container.data       |   5 +-
 .../scripts/tomlparser-prom-customconfig.rb   | 184 ++++++++++++++++++
 installer/scripts/tomlparser.rb               |  82 ++++----
 7 files changed, 444 insertions(+), 44 deletions(-)
 create mode 100644 installer/conf/telegraf-test-rs.conf
 create mode 100644 installer/conf/telegraf-test.conf
 create mode 100644 installer/scripts/tomlparser-prom-customconfig.rb

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index bde27f279..8e8665104 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -542,7 +542,7 @@
   ## An array of urls to scrape metrics from.
   #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
   #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
-  interval: "$AZMON_RS_PROM_INTERVAL"
+  interval = "$AZMON_RS_PROM_INTERVAL"
   ## An array of urls to scrape metrics from.
   urls = ["$AZMON_RS_PROM_URLS"]
 
diff --git a/installer/conf/telegraf-test-rs.conf b/installer/conf/telegraf-test-rs.conf
new file mode 100644
index 000000000..4ece2bf8c
--- /dev/null
+++ b/installer/conf/telegraf-test-rs.conf
@@ -0,0 +1,113 @@
+# Telegraf Configuration
+#
+# Telegraf is entirely plugin driven. All metrics are gathered from the
+# declared inputs, and sent to the declared outputs.
+#
+# Plugins must be declared in here to be active.
+# To deactivate a plugin, comment out the name and any variables.
+#
+# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
+# file would generate.
+#
+# Environment variables can be used anywhere in this config file, simply prepend
+# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
+# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
+
+# Configuration for telegraf agent
+[agent]
+  ## Default data collection interval for all inputs
+  interval = "60s"
+  ## Rounds collection interval to 'interval'
+  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
+  round_interval = true
+
+  ## Telegraf will send metrics to outputs in batches of at most
+  ## metric_batch_size metrics.
+  ## This controls the size of writes that Telegraf sends to output plugins.
+  metric_batch_size = 1000
+
+  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
+  ## output, and will flush this buffer on a successful write. Oldest metrics
+  ## are dropped first when this buffer fills.
+  ## This buffer only fills when writes fail to output plugin(s).
+  metric_buffer_limit = 10000
+
+  ## Collection jitter is used to jitter the collection by a random amount.
+  ## Each plugin will sleep for a random time within jitter before collecting.
+  ## This can be used to avoid many plugins querying things like sysfs at the
+  ## same time, which can have a measurable effect on the system.
+  collection_jitter = "0s"
+
+  ## Default flushing interval for all outputs. You shouldn't set this below
+  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
+  flush_interval = "60s"
+  ## Jitter the flush interval by a random amount. This is primarily to avoid
+  ## large write spikes for users running a large number of telegraf instances.
+  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
+  flush_jitter = "0s"
+
+  ## By default or when set to "0s", precision will be set to the same
+  ## timestamp order as the collection interval, with the maximum being 1s.
+  ##   ie, when interval = "10s", precision will be "1s"
+  ##       when interval = "250ms", precision will be "1ms"
+  ## Precision will NOT be used for service inputs. It is up to each individual
+  ## service input to set the timestamp at the appropriate precision.
+  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
+  precision = ""
+
+  ## Logging configuration:
+  ## Run telegraf with debug log messages.
+  debug = false
+  ## Run telegraf in quiet mode (error log messages only).
+  quiet = false
+  ## Specify the log file name. The empty string means to log to stderr.
+  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
+
+  ## Override default hostname, if empty use os.Hostname()
+  #hostname = "placeholder_hostname"
+  ## If set to true, do no set the "host" tag in the telegraf agent.
+  omit_hostname = true
+
+
+###############################################################################
+#                            INPUT PLUGINS                                    #
+###############################################################################
+
+#Prometheus Custom Metrics
+[[inputs.prometheus]]
+  ## An array of urls to scrape metrics from.
+  interval = "$AZMON_RS_PROM_INTERVAL"
+
+  ## An array of urls to scrape metrics from.
+  #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
+  urls = ["$AZMON_RS_PROM_URLS"]
+
+  #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
+  fieldpass = ["$AZMON_RS_PROM_FIELDPASS"]
+  fielddrop = ["$AZMON_RS_PROM_FIELDDROP"]
+
+  ## An array of Kubernetes services to scrape metrics from.
+  #kubernetes_services = ["https://kube-state-metrics.monitoring:8443/metrics","https://kube-state-metrics.monitoring:9443/metrics","http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics"]
+  kubernetes_services = ["$AZMON_RS_PROM_K8S_SERVICES"]
+
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to `https` & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
+  monitor_kubernetes_pods = $AZMON_RS_PROM_MONITOR_PODS
+
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
diff --git a/installer/conf/telegraf-test.conf b/installer/conf/telegraf-test.conf
new file mode 100644
index 000000000..f1a7880ad
--- /dev/null
+++ b/installer/conf/telegraf-test.conf
@@ -0,0 +1,100 @@
+# Telegraf Configuration
+#
+# Telegraf is entirely plugin driven. All metrics are gathered from the
+# declared inputs, and sent to the declared outputs.
+#
+# Plugins must be declared in here to be active.
+# To deactivate a plugin, comment out the name and any variables.
+#
+# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
+# file would generate.
+#
+# Environment variables can be used anywhere in this config file, simply prepend
+# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
+# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
+
+# Configuration for telegraf agent
+[agent]
+  ## Default data collection interval for all inputs
+  interval = "60s"
+  ## Rounds collection interval to 'interval'
+  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
+  round_interval = true
+
+  ## Telegraf will send metrics to outputs in batches of at most
+  ## metric_batch_size metrics.
+  ## This controls the size of writes that Telegraf sends to output plugins.
+  metric_batch_size = 1000
+
+  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
+  ## output, and will flush this buffer on a successful write. Oldest metrics
+  ## are dropped first when this buffer fills.
+  ## This buffer only fills when writes fail to output plugin(s).
+  metric_buffer_limit = 10000
+
+  ## Collection jitter is used to jitter the collection by a random amount.
+  ## Each plugin will sleep for a random time within jitter before collecting.
+  ## This can be used to avoid many plugins querying things like sysfs at the
+  ## same time, which can have a measurable effect on the system.
+  collection_jitter = "0s"
+
+  ## Default flushing interval for all outputs. You shouldn't set this below
+  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
+  flush_interval = "60s"
+  ## Jitter the flush interval by a random amount. This is primarily to avoid
+  ## large write spikes for users running a large number of telegraf instances.
+  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
+  flush_jitter = "0s"
+
+  ## By default or when set to "0s", precision will be set to the same
+  ## timestamp order as the collection interval, with the maximum being 1s.
+  ##   ie, when interval = "10s", precision will be "1s"
+  ##       when interval = "250ms", precision will be "1ms"
+  ## Precision will NOT be used for service inputs. It is up to each individual
+  ## service input to set the timestamp at the appropriate precision.
+  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
+  precision = ""
+
+  ## Logging configuration:
+  ## Run telegraf with debug log messages.
+  debug = false
+  ## Run telegraf in quiet mode (error log messages only).
+  quiet = false
+  ## Specify the log file name. The empty string means to log to stderr.
+  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
+
+  ## Override default hostname, if empty use os.Hostname()
+  #hostname = "placeholder_hostname"
+  ## If set to true, do no set the "host" tag in the telegraf agent.
+  omit_hostname = true
+
+
+###############################################################################
+#                            INPUT PLUGINS                                    #
+###############################################################################
+
+#Prometheus Custom Metrics
+[[inputs.prometheus]]
+  ## An array of urls to scrape metrics from.
+  interval = "$AZMON_DS_PROM_INTERVAL"
+
+  ## An array of urls to scrape metrics from.
+  #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
+  urls = ["$AZMON_DS_PROM_URLS"]
+
+  fieldpass = ["$AZMON_DS_PROM_FIELDPASS"]
+  fielddrop = ["$AZMON_DS_PROM_FIELDDROP"]
+
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index fa1d72ea7..a83db55cf 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -572,7 +572,7 @@
 ## prometheus custom metrics
 [[inputs.prometheus]]
 
-  interval: "$AZMON_DS_PROM_INTERVAL"
+  interval = "$AZMON_DS_PROM_INTERVAL"
 
   ## An array of urls to scrape metrics from.
   urls = ["$AZMON_DS_PROM_URLS"]
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 58a74aa0a..5a18805be 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -110,9 +110,12 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
-/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	        installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
+/opt/telegraf-test.conf;			                                installer/conf/telegraf-test.conf;                    644; root; root
+/opt/telegraf-test-rs.conf;			                                installer/conf/telegraf-test-rs.conf;                    644; root; root
+/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	    installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
 /opt/livenessprobe.sh;                                              installer/scripts/livenessprobe.sh;      755; root; root
 /opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb;     755; root; root 
+/opt/tomlparser-prom-customconfig.rb;                               installer/scripts/tomlparser-prom-customconfig.rb;     755; root; root 
 
 %Links
 /opt/omi/lib/libcontainer.${{SHLIB_EXT}}; /opt/microsoft/docker-cimprov/lib/libcontainer.${{SHLIB_EXT}}; 644; root; root
diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb
new file mode 100644
index 000000000..5df83c89a
--- /dev/null
+++ b/installer/scripts/tomlparser-prom-customconfig.rb
@@ -0,0 +1,184 @@
+#!/usr/local/bin/ruby
+
+require_relative "tomlrb"
+
+@promConfigMapMountPath = "/etc/config/settings/prometheus-data-collection-settings"
+@replicaset = "replicaset"
+@daemonset = "daemonset"
+@configSchemaVersion = ""
+
+# Use parser to parse the configmap toml file to a ruby structure
+def parseConfigMap
+  begin
+    # Check to see if config map is created
+    if (File.file?(@promConfigMapMountPath))
+      puts "config::configmap container-azm-ms-agentconfig for settings mounted, parsing values for prometheus config map"
+      parsedConfig = Tomlrb.load_file(@promConfigMapMountPath, symbolize_keys: true)
+      puts "config::Successfully parsed mounted prometheus config map"
+      return parsedConfig
+    else
+      puts "config::configmap container-azm-ms-agentconfig for settings not mounted, using defaults for prometheus scraping"
+      return nil
+    end
+  rescue => errorStr
+    puts "config::error::Exception while parsing toml config file for prometheus config: #{errorStr}, using defaults"
+    return nil
+  end
+end
+
+def checkForTypeArray(arrayValue, arrayType)
+  if !arrayValue.nil? && arrayValue.kind_of?(Array) && arrayValue.length > 0 && arrayValue[0].kind_of?(arrayType)
+    return true
+  else
+    return false
+  end
+end
+
+def checkForType(variable, varType)
+  if !variable.nil? && variable.kind_of?(varType)
+    return true
+  else
+    return false
+  end
+end
+
+# Use the ruby structure created after config parsing to set the right values to be used as environment variables
+def populateSettingValuesFromConfigMap(parsedConfig)
+  # Checking to see if this is the daemonset or replicaset to parse config accordingly
+  controller = ENV["CONTROLLER_TYPE"]
+  if !controller.nil?
+    if !parsedConfig.nil? && !parsedConfig[:prometheus_data_collection_settings].nil?
+      if controller.casecmp(@replicaset) == 0 && !parsedConfig[:prometheus_data_collection_settings][:cluster].nil?
+        #Get prometheus replicaset custom config settings
+        begin
+          interval = parsedConfig[:prometheus_data_collection_settings][:cluster][:interval]
+          fieldPass = parsedConfig[:prometheus_data_collection_settings][:cluster][:fieldpass]
+          fieldDrop = parsedConfig[:prometheus_data_collection_settings][:cluster][:fielddrop]
+          urls = parsedConfig[:prometheus_data_collection_settings][:cluster][:urls]
+          kubernetesServices = parsedConfig[:prometheus_data_collection_settings][:cluster][:kubernetes_services]
+          monitorKubernetesPods = parsedConfig[:prometheus_data_collection_settings][:cluster][:monitor_kubernetes_pods]
+
+          # Check for the right datattypes to enforce right setting values
+          if checkForType(interval, String) &&
+             checkForTypeArray(fieldPass, String) &&
+             checkForTypeArray(fieldDrop, String) &&
+             checkForTypeArray(kubernetesServices, String) &&
+             checkForTypeArray(urls, String) &&
+             !monitorKubernetesPods.nil? && (!!monitorKubernetesPods == monitorKubernetesPods) #Checking for Boolean type, since 'Boolean' is not defined as a type in ruby
+            puts "config::Successfully passed typecheck for config settings for replicaset"
+            # Write the settings to file, so that they can be set as environment variables
+            file = File.open("prom_config_env_var", "w")
+            if !file.nil?
+              file.write("export AZMON_RS_PROM_INTERVAL=#{interval}\n")
+              file.write("export TELEMETRY_RS_PROM_INTERVAL=\"#{interval}\"\n")
+              file.write("export AZMON_RS_PROM_FIELDPASS=\"#{fieldPass.join("\",\"")}\"\n")
+              #Setting array lengths as environment variables for telemetry purposes
+              file.write("export TELEMETRY_RS_PROM_FIELDPASS_LENGTH=\"#{fieldPass.length}\"\n")
+              file.write("export AZMON_RS_PROM_FIELDDROP=#{fieldDrop.join("\",\"")}\n")
+              file.write("export TELEMETRY_RS_PROM_FIELDDROP_LENGTH=\"#{fieldDrop.length}\"\n")
+              file.write("export AZMON_RS_PROM_K8S_SERVICES=#{kubernetesServices.join("\",\"")}\n")
+              file.write("export TELEMETRY_RS_PROM_K8S_SERVICES_LENGTH=#{kubernetesServices.length}\n")
+              file.write("export AZMON_RS_PROM_URLS=#{urls.join("\",\"")}\n")
+              file.write("export TELEMETRY_RS_PROM_URLS_LENGTH=#{urls.length}\n")
+              file.write("export AZMON_RS_PROM_MONITOR_PODS=#{monitorKubernetesPods}\n")
+              file.write("export TELEMETRY_RS_PROM_MONITOR_PODS=\"#{monitorKubernetesPods}\"\n")
+              # Close file after writing all environment variables
+              file.close
+              puts "config::Successfully created custom config environment variable file for replicaset"
+
+              #Also substitute these values in the test config file for telegraf
+              file_name = "telegraf-test-rs.conf"
+              text = File.read(file_name)
+              new_contents = text.gsub("$AZMON_RS_PROM_INTERVAL", interval)
+              new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDPASS", fieldPass.join("\",\""))
+              new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDDROP", fieldDrop.join("\",\""))
+              new_contents = new_contents.gsub("$AZMON_RS_PROM_URLS", urls.join("\",\""))
+              new_contents = new_contents.gsub("$AZMON_RS_PROM_K8S_SERVICES", kubernetesServices.join("\",\""))
+              new_contents = new_contents.gsub("$AZMON_RS_PROM_MONITOR_PODS", (monitorKubernetesPods ? "true" : "false"))
+
+              File.open(file_name, "w") { |file| file.puts new_contents }
+              puts "config::Successfully replaced the settings in test telegraf config file for replicaset"
+            else
+              puts "config::error::Exception while opening file for writing prometheus replicaset config environment variables"
+              puts "****************End Prometheus Config Processing********************"
+            end
+          else
+            puts "config::Typecheck failed for prometheus config settings for replicaset, using defaults"
+          end # end of type check condition
+        rescue => errorStr
+          puts "config::error::Exception while reading config file for prometheus config for replicaset: #{errorStr}, using defaults"
+          puts "****************End Prometheus Config Processing********************"
+        end
+      elsif controller.casecmp(@daemonset) == 0 && !parsedConfig[:prometheus_data_collection_settings][:node].nil?
+        #Get prometheus daemonset custom config settings
+        begin
+          interval = parsedConfig[:prometheus_data_collection_settings][:node][:interval]
+          fieldPass = parsedConfig[:prometheus_data_collection_settings][:node][:fieldpass]
+          fieldDrop = parsedConfig[:prometheus_data_collection_settings][:node][:fielddrop]
+          urls = parsedConfig[:prometheus_data_collection_settings][:node][:urls]
+
+          # Check for the right datattypes to enforce right setting values
+          if checkForType(interval, String) &&
+             checkForTypeArray(fieldPass, String) &&
+             checkForTypeArray(fieldDrop, String) &&
+             checkForTypeArray(urls, String)
+            puts "config::Successfully passed typecheck for config settings for daemonset"
+            # Write the settings to file, so that they can be set as environment variables
+            file = File.open("prom_config_env_var", "w")
+            if !file.nil?
+              file.write("export AZMON_DS_PROM_INTERVAL=#{interval}\n")
+              file.write("export TELEMETRY_DS_PROM_INTERVAL=\"#{interval}\"\n")
+              file.write("export AZMON_DS_PROM_FIELDPASS=\"#{fieldPass.join("\",\"")}\"\n")
+              #Setting array lengths as environment variables for telemetry purposes
+              file.write("export TELEMETRY_DS_PROM_FIELDPASS_LENGTH=\"#{fieldPass.length}\"\n")
+              file.write("export AZMON_DS_PROM_FIELDDROP=#{fieldDrop.join("\",\"")}\n")
+              file.write("export TELEMETRY_DS_PROM_FIELDDROP_LENGTH=\"#{fieldDrop.length}\"\n")
+              file.write("export AZMON_DS_PROM_URLS=#{urls.join("\",\"")}\n")
+              file.write("export TELEMETRY_DS_PROM_URLS_LENGTH=#{urls.length}\n")
+              # Close file after writing all environment variables
+              file.close
+              puts "config::Successfully created custom config environment variable file for daemonset"
+
+              #Also substitute these values in the test config file for telegraf
+              file_name = "telegraf-test.conf"
+              text = File.read(file_name)
+              new_contents = text.gsub("$AZMON_DS_PROM_INTERVAL", interval)
+              new_contents = new_contents.gsub("$AZMON_DS_PROM_FIELDPASS", fieldPass.join("\",\""))
+              new_contents = new_contents.gsub("$AZMON_DS_PROM_FIELDDROP", fieldDrop.join("\",\""))
+              new_contents = new_contents.gsub("$AZMON_DS_PROM_URLS", urls.join("\",\""))
+              # To write changes to the file, use:
+              File.open(file_name, "w") { |file| file.puts new_contents }
+              puts "config::Successfully replaced the settings in test telegraf config file for daemonset"
+            else
+              puts "config::error::Exception while opening file for writing prometheus daemonset config environment variables"
+              puts "****************End Prometheus Config Processing********************"
+            end
+          else
+            puts "config::Typecheck failed for prometheus config settings for daemonset, using defaults"
+          end # end of type check condition
+        rescue => errorStr
+          puts "config::error::Exception while reading config file for prometheus config for daemonset: #{errorStr}, using defaults"
+          puts "****************End Prometheus Config Processing********************"
+        end
+      end # end of controller type check
+    end
+  else
+    puts "config::error:: Controller undefined while processing prometheus config, using defaults"
+  end
+end
+
+@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+puts "****************Start Prometheus Config Processing********************"
+if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
+  configMapSettings = parseConfigMap
+  if !configMapSettings.nil?
+    populateSettingValuesFromConfigMap(configMapSettings)
+  end
+else
+  if (File.file?(@promConfigMapMountPath))
+    puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults"
+  else
+    puts "config::No configmap mounted for prometheus custom config, using defaults"
+  end
+end
+puts "****************End Prometheus Config Processing********************"
diff --git a/installer/scripts/tomlparser.rb b/installer/scripts/tomlparser.rb
index 3e7f48045..c72e64127 100644
--- a/installer/scripts/tomlparser.rb
+++ b/installer/scripts/tomlparser.rb
@@ -82,7 +82,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
         if @collectStderrLogs && !stderrNamespaces.nil?
           if stderrNamespaces.kind_of?(Array)
             if !@stdoutExcludeNamespaces.nil? && !@stdoutExcludeNamespaces.empty?
-              stdoutNamespaces = @stdoutExcludeNamespaces.split(',')
+              stdoutNamespaces = @stdoutExcludeNamespaces.split(",")
             end
             # Checking only for the first element to be string because toml enforces the arrays to contain elements of same type
             if stderrNamespaces.length > 0 && stderrNamespaces[0].kind_of?(String)
@@ -119,47 +119,47 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   end
 end
 
-  @configSchemaVersion = ENV['AZMON_AGENT_CFG_SCHEMA_VERSION']
-  puts "****************Start Config Processing********************"
-  if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp('v1') == 0 #note v1 is the only supported schema version , so hardcoding it
-    configMapSettings = parseConfigMap
-    if !configMapSettings.nil?
-      populateSettingValuesFromConfigMap(configMapSettings)
-    end
-  else
-    if (File.file?(@configMapMountPath))
-      puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults"
-    end 
-    @excludePath = "*_kube-system_*.log"
+@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+puts "****************Start Config Processing********************"
+if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
+  configMapSettings = parseConfigMap
+  if !configMapSettings.nil?
+    populateSettingValuesFromConfigMap(configMapSettings)
+  end
+else
+  if (File.file?(@configMapMountPath))
+    puts "config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults"
   end
+  @excludePath = "*_kube-system_*.log"
+end
 
-  # Write the settings to file, so that they can be set as environment variables
-  file = File.open("config_env_var", "w")
+# Write the settings to file, so that they can be set as environment variables
+file = File.open("config_env_var", "w")
 
-  if !file.nil?
-    # This will be used in td-agent-bit.conf file to filter out logs
-    if (!@collectStdoutLogs && !@collectStderrLogs)
-      #Stop log tailing completely
-      @logTailPath = "/opt/nolog*.log"
-      @logExclusionRegexPattern = "stdout|stderr"
-    elsif !@collectStdoutLogs
-      @logExclusionRegexPattern = "stdout"
-    elsif !@collectStderrLogs
-      @logExclusionRegexPattern = "stderr"
-    end
-    file.write("export AZMON_COLLECT_STDOUT_LOGS=#{@collectStdoutLogs}\n")
-    file.write("export AZMON_LOG_TAIL_PATH=#{@logTailPath}\n")
-    file.write("export AZMON_LOG_EXCLUSION_REGEX_PATTERN=\"#{@logExclusionRegexPattern}\"\n")
-    file.write("export AZMON_STDOUT_EXCLUDED_NAMESPACES=#{@stdoutExcludeNamespaces}\n")
-    file.write("export AZMON_COLLECT_STDERR_LOGS=#{@collectStderrLogs}\n")
-    file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n")
-    file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
-    file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
-    # Close file after writing all environment variables
-    file.close
-    puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
-    puts "****************End Config Processing********************"
-  else
-    puts "config::error::Exception while opening file for writing config environment variables"
-    puts "****************End Config Processing********************"
+if !file.nil?
+  # This will be used in td-agent-bit.conf file to filter out logs
+  if (!@collectStdoutLogs && !@collectStderrLogs)
+    #Stop log tailing completely
+    @logTailPath = "/opt/nolog*.log"
+    @logExclusionRegexPattern = "stdout|stderr"
+  elsif !@collectStdoutLogs
+    @logExclusionRegexPattern = "stdout"
+  elsif !@collectStderrLogs
+    @logExclusionRegexPattern = "stderr"
   end
+  file.write("export AZMON_COLLECT_STDOUT_LOGS=#{@collectStdoutLogs}\n")
+  file.write("export AZMON_LOG_TAIL_PATH=#{@logTailPath}\n")
+  file.write("export AZMON_LOG_EXCLUSION_REGEX_PATTERN=\"#{@logExclusionRegexPattern}\"\n")
+  file.write("export AZMON_STDOUT_EXCLUDED_NAMESPACES=#{@stdoutExcludeNamespaces}\n")
+  file.write("export AZMON_COLLECT_STDERR_LOGS=#{@collectStderrLogs}\n")
+  file.write("export AZMON_STDERR_EXCLUDED_NAMESPACES=#{@stderrExcludeNamespaces}\n")
+  file.write("export AZMON_CLUSTER_COLLECT_ENV_VAR=#{@collectClusterEnvVariables}\n")
+  file.write("export AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH=#{@excludePath}\n")
+  # Close file after writing all environment variables
+  file.close
+  puts "Both stdout & stderr log collection are turned off for namespaces: '#{@excludePath}' "
+  puts "****************End Config Processing********************"
+else
+  puts "config::error::Exception while opening file for writing config environment variables"
+  puts "****************End Config Processing********************"
+end

From d12c7df322f6b62263475e9df38e8a1e61429ffb Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Tue, 9 Jul 2019 11:52:30 -0700
Subject: [PATCH 09/12] Rashmi/promcustomconfig (#236)

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* changes

* fix exceptions

* changes to remove some exceptions

* exception fixes
---
 installer/conf/td-agent-bit-rs.conf           |  11 -
 installer/conf/td-agent-bit.conf              |  22 +-
 installer/conf/telegraf-rs.conf               |  69 ++--
 installer/conf/telegraf-test-rs.conf          | 113 ------
 installer/conf/telegraf-test.conf             | 100 ------
 installer/conf/telegraf.conf                  |  67 ++--
 installer/datafiles/base_container.data       |   2 -
 .../scripts/tomlparser-prom-customconfig.rb   | 116 ++++---
 source/code/go/src/plugins/oms.go             |  20 +-
 source/code/go/src/plugins/out_oms.go         |   2 -
 source/code/plugin/DockerApiClient.rb         | 325 +++++++++---------
 source/code/plugin/KubernetesApiClient.rb     |   2 +-
 source/code/plugin/in_containerinventory.rb   |   5 +-
 source/code/plugin/in_kube_events.rb          | 138 ++++----
 source/code/plugin/in_kube_nodes.rb           | 134 ++++----
 source/code/plugin/in_kube_podinventory.rb    |  14 +-
 source/code/plugin/in_kube_services.rb        | 191 +++++-----
 17 files changed, 562 insertions(+), 769 deletions(-)
 delete mode 100644 installer/conf/telegraf-test-rs.conf
 delete mode 100644 installer/conf/telegraf-test.conf

diff --git a/installer/conf/td-agent-bit-rs.conf b/installer/conf/td-agent-bit-rs.conf
index 0e7218a2b..7839b0eee 100644
--- a/installer/conf/td-agent-bit-rs.conf
+++ b/installer/conf/td-agent-bit-rs.conf
@@ -4,17 +4,6 @@
     Parsers_File  /etc/td-agent-bit/parsers.conf
     Log_File      /var/opt/microsoft/docker-cimprov/log/fluent-bit.log
 
-[INPUT]
-    Name tail
-    Tag oms.container.log.telegraf.err.*
-    Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
-    DB /var/opt/microsoft/docker-cimprov/state/telegraf-rs-log-state.db
-    DB.Sync Off
-    Mem_Buf_Limit 1m
-    Path_Key filepath
-    Skip_Long_Lines On
-    Ignore_Older 2m
-
 [INPUT]
     Name        tcp
     Tag oms.container.perf.telegraf.*
diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index 2dee26234..e7aabd242 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -6,7 +6,7 @@
 
 [INPUT]
     Name tail
-    Tag oms.container.log.*
+    Tag oms.container.log.la.*
     Path ${AZMON_LOG_TAIL_PATH}
     DB /var/log/omsagent-fblogs.db
     DB.Sync Off
@@ -32,17 +32,6 @@
     Skip_Long_Lines On
     Ignore_Older 2m
 
-[INPUT]
-    Name tail
-    Tag oms.container.log.telegraf.err.*
-    Path /var/opt/microsoft/docker-cimprov/log/telegraf.log
-    DB /var/opt/microsoft/docker-cimprov/state/telegraf-log-state.db
-    DB.Sync Off
-    Mem_Buf_Limit 1m
-    Path_Key filepath
-    Skip_Long_Lines On
-    Ignore_Older 2m
-
 [INPUT]
     Name        tcp
     Tag oms.container.perf.telegraf.*
@@ -53,9 +42,16 @@
 
 [FILTER]
     Name grep
-    Match oms.container.log.*
+    Match oms.container.log.la.*
     Exclude stream ${AZMON_LOG_EXCLUSION_REGEX_PATTERN}
 
+# Exclude prometheus plugin exceptions that might be caused due to invalid config.(Logs which contain - E! [inputs.prometheus])
+# Excluding these logs from being sent to AI since it can result in high volume of data in telemetry due to invalid config.
+[FILTER]
+    Name grep
+    Match oms.container.log.flbplugin.*
+    Exclude log E! [\[]inputs.prometheus[\]]
+
 [OUTPUT]
     Name                            oms
     EnableTelemetry                 true
diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index 8e8665104..53aa03620 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -75,9 +75,9 @@
   ## Run telegraf with debug log messages.
   debug = false
   ## Run telegraf in quiet mode (error log messages only).
-  quiet = false
+  quiet = true
   ## Specify the log file name. The empty string means to log to stderr.
-  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
+  logfile = ""
 
   ## Override default hostname, if empty use os.Hostname()
   #hostname = "placeholder_hostname"
@@ -544,14 +544,13 @@
   #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
   interval = "$AZMON_RS_PROM_INTERVAL"
   ## An array of urls to scrape metrics from.
-  urls = ["$AZMON_RS_PROM_URLS"]
+  urls = $AZMON_RS_PROM_URLS
 
-  kubernetes_services = ["$AZMON_RS_PROM_K8S_SERVICES"]
+  kubernetes_services = $AZMON_RS_PROM_K8S_SERVICES
   monitor_kubernetes_pods = $AZMON_RS_PROM_MONITOR_PODS
 
-
-  fieldpass = ["$AZMON_RS_PROM_FIELDPASS"]
-  fielddrop = ["$AZMON_RS_PROM_FIELDDROP"]
+  fieldpass = $AZMON_RS_PROM_FIELDPASS
+  fielddrop = $AZMON_RS_PROM_FIELDDROP
 
   metric_version = 2
   url_tag = "scrapeUrl"
@@ -586,32 +585,32 @@
   insecure_skip_verify = true
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 
-[[inputs.exec]]
-  ## Commands array
-  interval = "15m"
-  commands = [
-    "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
-  ]
-
-  ## Timeout for each command to complete.
-  timeout = "15s"
-
-  ## measurement name suffix (for separating different commands)
-  name_suffix = "_telemetry"
-
-  ## Data format to consume.
-  ## Each data format has its own unique set of configuration options, read
-  ## more about them here:
-  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
-  data_format = "influx"
-  #tagexclude = ["hostName"]
-  [inputs.exec.tags]
-    AgentVersion = "$AGENT_VERSION"
-    AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
-    ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
-    Region = "$TELEMETRY_AKS_REGION"
-    ClusterName = "$TELEMETRY_CLUSTER_NAME"
-    ClusterType = "$TELEMETRY_CLUSTER_TYPE"
-    Computer = "placeholder_hostname"
-    ControllerType = "$CONTROLLER_TYPE"
+# [[inputs.exec]]
+#   ## Commands array
+#   interval = "15m"
+#   commands = [
+#     "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
+#   ]
+
+#   ## Timeout for each command to complete.
+#   timeout = "15s"
+
+#   ## measurement name suffix (for separating different commands)
+#   name_suffix = "_telemetry"
+
+#   ## Data format to consume.
+#   ## Each data format has its own unique set of configuration options, read
+#   ## more about them here:
+#   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+#   data_format = "influx"
+#   #tagexclude = ["hostName"]
+#   [inputs.exec.tags]
+#     AgentVersion = "$AGENT_VERSION"
+#     AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
+#     ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
+#     Region = "$TELEMETRY_AKS_REGION"
+#     ClusterName = "$TELEMETRY_CLUSTER_NAME"
+#     ClusterType = "$TELEMETRY_CLUSTER_TYPE"
+#     Computer = "placeholder_hostname"
+#     ControllerType = "$CONTROLLER_TYPE"
 
diff --git a/installer/conf/telegraf-test-rs.conf b/installer/conf/telegraf-test-rs.conf
deleted file mode 100644
index 4ece2bf8c..000000000
--- a/installer/conf/telegraf-test-rs.conf
+++ /dev/null
@@ -1,113 +0,0 @@
-# Telegraf Configuration
-#
-# Telegraf is entirely plugin driven. All metrics are gathered from the
-# declared inputs, and sent to the declared outputs.
-#
-# Plugins must be declared in here to be active.
-# To deactivate a plugin, comment out the name and any variables.
-#
-# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
-# file would generate.
-#
-# Environment variables can be used anywhere in this config file, simply prepend
-# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
-# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
-
-# Configuration for telegraf agent
-[agent]
-  ## Default data collection interval for all inputs
-  interval = "60s"
-  ## Rounds collection interval to 'interval'
-  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
-  round_interval = true
-
-  ## Telegraf will send metrics to outputs in batches of at most
-  ## metric_batch_size metrics.
-  ## This controls the size of writes that Telegraf sends to output plugins.
-  metric_batch_size = 1000
-
-  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
-  ## output, and will flush this buffer on a successful write. Oldest metrics
-  ## are dropped first when this buffer fills.
-  ## This buffer only fills when writes fail to output plugin(s).
-  metric_buffer_limit = 10000
-
-  ## Collection jitter is used to jitter the collection by a random amount.
-  ## Each plugin will sleep for a random time within jitter before collecting.
-  ## This can be used to avoid many plugins querying things like sysfs at the
-  ## same time, which can have a measurable effect on the system.
-  collection_jitter = "0s"
-
-  ## Default flushing interval for all outputs. You shouldn't set this below
-  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
-  flush_interval = "60s"
-  ## Jitter the flush interval by a random amount. This is primarily to avoid
-  ## large write spikes for users running a large number of telegraf instances.
-  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
-  flush_jitter = "0s"
-
-  ## By default or when set to "0s", precision will be set to the same
-  ## timestamp order as the collection interval, with the maximum being 1s.
-  ##   ie, when interval = "10s", precision will be "1s"
-  ##       when interval = "250ms", precision will be "1ms"
-  ## Precision will NOT be used for service inputs. It is up to each individual
-  ## service input to set the timestamp at the appropriate precision.
-  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
-  precision = ""
-
-  ## Logging configuration:
-  ## Run telegraf with debug log messages.
-  debug = false
-  ## Run telegraf in quiet mode (error log messages only).
-  quiet = false
-  ## Specify the log file name. The empty string means to log to stderr.
-  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
-
-  ## Override default hostname, if empty use os.Hostname()
-  #hostname = "placeholder_hostname"
-  ## If set to true, do no set the "host" tag in the telegraf agent.
-  omit_hostname = true
-
-
-###############################################################################
-#                            INPUT PLUGINS                                    #
-###############################################################################
-
-#Prometheus Custom Metrics
-[[inputs.prometheus]]
-  ## An array of urls to scrape metrics from.
-  interval = "$AZMON_RS_PROM_INTERVAL"
-
-  ## An array of urls to scrape metrics from.
-  #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
-  urls = ["$AZMON_RS_PROM_URLS"]
-
-  #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
-  fieldpass = ["$AZMON_RS_PROM_FIELDPASS"]
-  fielddrop = ["$AZMON_RS_PROM_FIELDDROP"]
-
-  ## An array of Kubernetes services to scrape metrics from.
-  #kubernetes_services = ["https://kube-state-metrics.monitoring:8443/metrics","https://kube-state-metrics.monitoring:9443/metrics","http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics"]
-  kubernetes_services = ["$AZMON_RS_PROM_K8S_SERVICES"]
-
-  ## Scrape Kubernetes pods for the following prometheus annotations:
-  ## - prometheus.io/scrape: Enable scraping for this pod
-  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
-  ##     set this to `https` & most likely set the tls config.
-  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
-  ## - prometheus.io/port: If port is not 9102 use this annotation
-  monitor_kubernetes_pods = $AZMON_RS_PROM_MONITOR_PODS
-
-  metric_version = 2
-  url_tag = "scrapeUrl"
-
-  ## Use bearer token for authorization. ('bearer_token' takes priority)
-  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
-
-  ## Specify timeout duration for slower prometheus clients (default is 3s)
-  response_timeout = "15s"
-
-  ## Optional TLS Config
-  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
-  ## Use TLS but skip chain & host verification
-  insecure_skip_verify = true
diff --git a/installer/conf/telegraf-test.conf b/installer/conf/telegraf-test.conf
deleted file mode 100644
index f1a7880ad..000000000
--- a/installer/conf/telegraf-test.conf
+++ /dev/null
@@ -1,100 +0,0 @@
-# Telegraf Configuration
-#
-# Telegraf is entirely plugin driven. All metrics are gathered from the
-# declared inputs, and sent to the declared outputs.
-#
-# Plugins must be declared in here to be active.
-# To deactivate a plugin, comment out the name and any variables.
-#
-# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
-# file would generate.
-#
-# Environment variables can be used anywhere in this config file, simply prepend
-# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
-# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)
-
-# Configuration for telegraf agent
-[agent]
-  ## Default data collection interval for all inputs
-  interval = "60s"
-  ## Rounds collection interval to 'interval'
-  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
-  round_interval = true
-
-  ## Telegraf will send metrics to outputs in batches of at most
-  ## metric_batch_size metrics.
-  ## This controls the size of writes that Telegraf sends to output plugins.
-  metric_batch_size = 1000
-
-  ## For failed writes, telegraf will cache metric_buffer_limit metrics for each
-  ## output, and will flush this buffer on a successful write. Oldest metrics
-  ## are dropped first when this buffer fills.
-  ## This buffer only fills when writes fail to output plugin(s).
-  metric_buffer_limit = 10000
-
-  ## Collection jitter is used to jitter the collection by a random amount.
-  ## Each plugin will sleep for a random time within jitter before collecting.
-  ## This can be used to avoid many plugins querying things like sysfs at the
-  ## same time, which can have a measurable effect on the system.
-  collection_jitter = "0s"
-
-  ## Default flushing interval for all outputs. You shouldn't set this below
-  ## interval. Maximum flush_interval will be flush_interval + flush_jitter
-  flush_interval = "60s"
-  ## Jitter the flush interval by a random amount. This is primarily to avoid
-  ## large write spikes for users running a large number of telegraf instances.
-  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
-  flush_jitter = "0s"
-
-  ## By default or when set to "0s", precision will be set to the same
-  ## timestamp order as the collection interval, with the maximum being 1s.
-  ##   ie, when interval = "10s", precision will be "1s"
-  ##       when interval = "250ms", precision will be "1ms"
-  ## Precision will NOT be used for service inputs. It is up to each individual
-  ## service input to set the timestamp at the appropriate precision.
-  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
-  precision = ""
-
-  ## Logging configuration:
-  ## Run telegraf with debug log messages.
-  debug = false
-  ## Run telegraf in quiet mode (error log messages only).
-  quiet = false
-  ## Specify the log file name. The empty string means to log to stderr.
-  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
-
-  ## Override default hostname, if empty use os.Hostname()
-  #hostname = "placeholder_hostname"
-  ## If set to true, do no set the "host" tag in the telegraf agent.
-  omit_hostname = true
-
-
-###############################################################################
-#                            INPUT PLUGINS                                    #
-###############################################################################
-
-#Prometheus Custom Metrics
-[[inputs.prometheus]]
-  ## An array of urls to scrape metrics from.
-  interval = "$AZMON_DS_PROM_INTERVAL"
-
-  ## An array of urls to scrape metrics from.
-  #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
-  urls = ["$AZMON_DS_PROM_URLS"]
-
-  fieldpass = ["$AZMON_DS_PROM_FIELDPASS"]
-  fielddrop = ["$AZMON_DS_PROM_FIELDDROP"]
-
-  metric_version = 2
-  url_tag = "scrapeUrl"
-
-  ## Use bearer token for authorization. ('bearer_token' takes priority)
-  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
-
-  ## Specify timeout duration for slower prometheus clients (default is 3s)
-  response_timeout = "15s"
-
-  ## Optional TLS Config
-  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
-  ## Use TLS but skip chain & host verification
-  insecure_skip_verify = true
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index a83db55cf..47e71c5f5 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -75,10 +75,9 @@
   ## Run telegraf with debug log messages.
   debug = false
   ## Run telegraf in quiet mode (error log messages only).
-  quiet = false
+  quiet = true
   ## Specify the log file name. The empty string means to log to stderr.
-  logfile = "/var/opt/microsoft/docker-cimprov/log/telegraf.log"
-
+  logfile = ""
   ## Override default hostname, if empty use os.Hostname()
   #hostname = "placeholder_hostname"
   ## If set to true, do no set the "host" tag in the telegraf agent.
@@ -575,11 +574,11 @@
   interval = "$AZMON_DS_PROM_INTERVAL"
 
   ## An array of urls to scrape metrics from.
-  urls = ["$AZMON_DS_PROM_URLS"]
+  urls = $AZMON_DS_PROM_URLS
 
-  fieldpass = ["$AZMON_DS_PROM_FIELDPASS"]
+  fieldpass = $AZMON_DS_PROM_FIELDPASS
   
-  fielddrop = ["$AZMON_DS_PROM_FIELDDROP"]
+  fielddrop = $AZMON_DS_PROM_FIELDDROP
 
   metric_version = 2
   url_tag = "scrapeUrl"
@@ -614,31 +613,31 @@
   insecure_skip_verify = true
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 
-[[inputs.exec]]
-  ## Commands array
-  interval = "15m"
-  commands = [
-    "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
-  ]
-
-  ## Timeout for each command to complete.
-  timeout = "15s"
-
-  ## measurement name suffix (for separating different commands)
-  name_suffix = "_telemetry"
-
-  ## Data format to consume.
-  ## Each data format has its own unique set of configuration options, read
-  ## more about them here:
-  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
-  data_format = "influx"
-  tagexclude = ["hostName"]
-  [inputs.exec.tags]
-    AgentVersion = "$AGENT_VERSION"
-    AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
-    ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
-    Region = "$TELEMETRY_AKS_REGION"
-    ClusterName = "$TELEMETRY_CLUSTER_NAME"
-    ClusterType = "$TELEMETRY_CLUSTER_TYPE"
-    Computer = "placeholder_hostname"
-    ControllerType = "$CONTROLLER_TYPE"
\ No newline at end of file
+# [[inputs.exec]]
+#   ## Commands array
+#   interval = "15m"
+#   commands = [
+#     "/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh"
+#   ]
+
+#   ## Timeout for each command to complete.
+#   timeout = "15s"
+
+#   ## measurement name suffix (for separating different commands)
+#   name_suffix = "_telemetry"
+
+#   ## Data format to consume.
+#   ## Each data format has its own unique set of configuration options, read
+#   ## more about them here:
+#   ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+#   data_format = "influx"
+#   tagexclude = ["hostName"]
+#   [inputs.exec.tags]
+#     AgentVersion = "$AGENT_VERSION"
+#     AKS_RESOURCE_ID = "$TELEMETRY_AKS_RESOURCE_ID"
+#     ACS_RESOURCE_NAME = "$TELEMETRY_ACS_RESOURCE_NAME"
+#     Region = "$TELEMETRY_AKS_REGION"
+#     ClusterName = "$TELEMETRY_CLUSTER_NAME"
+#     ClusterType = "$TELEMETRY_CLUSTER_TYPE"
+#     Computer = "placeholder_hostname"
+#     ControllerType = "$CONTROLLER_TYPE"
\ No newline at end of file
diff --git a/installer/datafiles/base_container.data b/installer/datafiles/base_container.data
index 5a18805be..fe1635335 100644
--- a/installer/datafiles/base_container.data
+++ b/installer/datafiles/base_container.data
@@ -110,8 +110,6 @@ MAINTAINER:              'Microsoft Corporation'
 /etc/opt/microsoft/docker-cimprov/out_oms.conf;			            installer/conf/out_oms.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf.conf;			        installer/conf/telegraf.conf;                    644; root; root
 /etc/opt/microsoft/docker-cimprov/telegraf-rs.conf;			        installer/conf/telegraf-rs.conf;                    644; root; root
-/opt/telegraf-test.conf;			                                installer/conf/telegraf-test.conf;                    644; root; root
-/opt/telegraf-test-rs.conf;			                                installer/conf/telegraf-test-rs.conf;                    644; root; root
 /opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh;	    installer/scripts/TelegrafTCPErrorTelemetry.sh;      755; root; root
 /opt/livenessprobe.sh;                                              installer/scripts/livenessprobe.sh;      755; root; root
 /opt/tomlparser.rb;                                                 installer/scripts/tomlparser.rb;     755; root; root 
diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb
index 5df83c89a..d9fdf1cc2 100644
--- a/installer/scripts/tomlparser-prom-customconfig.rb
+++ b/installer/scripts/tomlparser-prom-customconfig.rb
@@ -1,11 +1,22 @@
 #!/usr/local/bin/ruby
 
 require_relative "tomlrb"
+require "fileutils"
 
 @promConfigMapMountPath = "/etc/config/settings/prometheus-data-collection-settings"
 @replicaset = "replicaset"
 @daemonset = "daemonset"
 @configSchemaVersion = ""
+@defaultDsInterval = "1m"
+@defaultDsPromUrls = []
+@defaultDsFieldPass = []
+@defaultDsFieldDrop = []
+@defaultRsInterval = "1m"
+@defaultRsPromUrls = []
+@defaultRsFieldPass = []
+@defaultRsFieldDrop = []
+@defaultRsK8sServices = []
+@defaultRsMonitorPods = false
 
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap
@@ -27,7 +38,7 @@ def parseConfigMap
 end
 
 def checkForTypeArray(arrayValue, arrayType)
-  if !arrayValue.nil? && arrayValue.kind_of?(Array) && arrayValue.length > 0 && arrayValue[0].kind_of?(arrayType)
+  if (arrayValue.nil? || (arrayValue.kind_of?(Array) && arrayValue.length > 0 && arrayValue[0].kind_of?(arrayType)))
     return true
   else
     return false
@@ -35,7 +46,7 @@ def checkForTypeArray(arrayValue, arrayType)
 end
 
 def checkForType(variable, varType)
-  if !variable.nil? && variable.kind_of?(varType)
+  if variable.nil? || variable.kind_of?(varType)
     return true
   else
     return false
@@ -66,47 +77,49 @@ def populateSettingValuesFromConfigMap(parsedConfig)
              checkForTypeArray(urls, String) &&
              !monitorKubernetesPods.nil? && (!!monitorKubernetesPods == monitorKubernetesPods) #Checking for Boolean type, since 'Boolean' is not defined as a type in ruby
             puts "config::Successfully passed typecheck for config settings for replicaset"
-            # Write the settings to file, so that they can be set as environment variables
-            file = File.open("prom_config_env_var", "w")
+            #if setting is nil assign default values
+            interval = (interval.nil?) ? @defaultRsInterval : interval
+            fieldPass = (fieldPass.nil?) ? @defaultRsFieldPass : fieldPass
+            fieldDrop = (fieldDrop.nil?) ? @defaultRsFieldDrop : fieldDrop
+            kubernetesServices = (kubernetesServices.nil?) ? @defaultRsK8sServices : kubernetesServices
+            urls = (urls.nil?) ? @defaultRsPromUrls : urls
+            monitorKubernetesPods = (kubernetesServices.nil?) ? @defaultRsMonitorPods : monitorKubernetesPods
+
+            file_name = "/opt/telegraf-test-rs.conf"
+            # Copy the telegraf config file to a temp file to run telegraf in test mode with this config
+            FileUtils.cp("/etc/opt/microsoft/docker-cimprov/telegraf-rs.conf", file_name)
+
+            puts "config::Starting to substitute the placeholders in telegraf conf copy file for replicaset"
+            #Replace the placeholder config values with values from custom config
+            text = File.read(file_name)
+            new_contents = text.gsub("$AZMON_RS_PROM_INTERVAL", interval)
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDPASS", ((fieldPass.length > 0) ? ("[\"" + fieldPass.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDDROP", ((fieldDrop.length > 0) ? ("[\"" + fieldDrop.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_URLS", ((urls.length > 0) ? ("[\"" + urls.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_K8S_SERVICES", ((kubernetesServices.length > 0) ? ("[\"" + kubernetesServices.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_RS_PROM_MONITOR_PODS", (monitorKubernetesPods ? "true" : "false"))
+            File.open(file_name, "w") { |file| file.puts new_contents }
+            puts "config::Successfully substituted the placeholders in telegraf conf file for replicaset"
+            #Set environment variables for telemetry
+            file = File.open("telemetry_prom_config_env_var", "w")
             if !file.nil?
-              file.write("export AZMON_RS_PROM_INTERVAL=#{interval}\n")
               file.write("export TELEMETRY_RS_PROM_INTERVAL=\"#{interval}\"\n")
-              file.write("export AZMON_RS_PROM_FIELDPASS=\"#{fieldPass.join("\",\"")}\"\n")
               #Setting array lengths as environment variables for telemetry purposes
               file.write("export TELEMETRY_RS_PROM_FIELDPASS_LENGTH=\"#{fieldPass.length}\"\n")
-              file.write("export AZMON_RS_PROM_FIELDDROP=#{fieldDrop.join("\",\"")}\n")
               file.write("export TELEMETRY_RS_PROM_FIELDDROP_LENGTH=\"#{fieldDrop.length}\"\n")
-              file.write("export AZMON_RS_PROM_K8S_SERVICES=#{kubernetesServices.join("\",\"")}\n")
               file.write("export TELEMETRY_RS_PROM_K8S_SERVICES_LENGTH=#{kubernetesServices.length}\n")
-              file.write("export AZMON_RS_PROM_URLS=#{urls.join("\",\"")}\n")
               file.write("export TELEMETRY_RS_PROM_URLS_LENGTH=#{urls.length}\n")
-              file.write("export AZMON_RS_PROM_MONITOR_PODS=#{monitorKubernetesPods}\n")
               file.write("export TELEMETRY_RS_PROM_MONITOR_PODS=\"#{monitorKubernetesPods}\"\n")
               # Close file after writing all environment variables
               file.close
-              puts "config::Successfully created custom config environment variable file for replicaset"
-
-              #Also substitute these values in the test config file for telegraf
-              file_name = "telegraf-test-rs.conf"
-              text = File.read(file_name)
-              new_contents = text.gsub("$AZMON_RS_PROM_INTERVAL", interval)
-              new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDPASS", fieldPass.join("\",\""))
-              new_contents = new_contents.gsub("$AZMON_RS_PROM_FIELDDROP", fieldDrop.join("\",\""))
-              new_contents = new_contents.gsub("$AZMON_RS_PROM_URLS", urls.join("\",\""))
-              new_contents = new_contents.gsub("$AZMON_RS_PROM_K8S_SERVICES", kubernetesServices.join("\",\""))
-              new_contents = new_contents.gsub("$AZMON_RS_PROM_MONITOR_PODS", (monitorKubernetesPods ? "true" : "false"))
-
-              File.open(file_name, "w") { |file| file.puts new_contents }
-              puts "config::Successfully replaced the settings in test telegraf config file for replicaset"
-            else
-              puts "config::error::Exception while opening file for writing prometheus replicaset config environment variables"
-              puts "****************End Prometheus Config Processing********************"
+              puts "config::Successfully created telemetry file for replicaset"
             end
           else
             puts "config::Typecheck failed for prometheus config settings for replicaset, using defaults"
           end # end of type check condition
         rescue => errorStr
-          puts "config::error::Exception while reading config file for prometheus config for replicaset: #{errorStr}, using defaults"
+          puts "config::error::Exception while parsing config file for prometheus config for replicaset: #{errorStr}, using defaults"
+          setRsPromDefaults
           puts "****************End Prometheus Config Processing********************"
         end
       elsif controller.casecmp(@daemonset) == 0 && !parsedConfig[:prometheus_data_collection_settings][:node].nil?
@@ -123,41 +136,44 @@ def populateSettingValuesFromConfigMap(parsedConfig)
              checkForTypeArray(fieldDrop, String) &&
              checkForTypeArray(urls, String)
             puts "config::Successfully passed typecheck for config settings for daemonset"
-            # Write the settings to file, so that they can be set as environment variables
-            file = File.open("prom_config_env_var", "w")
+
+            #if setting is nil assign default values
+            interval = (interval.nil?) ? @defaultDsInterval : interval
+            fieldPass = (fieldPass.nil?) ? @defaultDsFieldPass : fieldPass
+            fieldDrop = (fieldDrop.nil?) ? @defaultDsFieldDrop : fieldDrop
+            urls = (urls.nil?) ? @defaultDsPromUrls : urls
+
+            file_name = "/opt/telegraf-test.conf"
+            # Copy the telegraf config file to a temp file to run telegraf in test mode with this config
+            FileUtils.cp("/etc/opt/microsoft/docker-cimprov/telegraf.conf", file_name)
+
+            puts "config::Starting to substitute the placeholders in telegraf conf copy file for daemonset"
+            #Replace the placeholder config values with values from custom config
+            text = File.read(file_name)
+            new_contents = text.gsub("$AZMON_DS_PROM_INTERVAL", interval)
+            new_contents = new_contents.gsub("$AZMON_DS_PROM_FIELDPASS", ((fieldPass.length > 0) ? ("[\"" + fieldPass.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_DS_PROM_FIELDDROP", ((fieldDrop.length > 0) ? ("[\"" + fieldDrop.join("\",\"") + "\"]") : "[]"))
+            new_contents = new_contents.gsub("$AZMON_DS_PROM_URLS", ((urls.length > 0) ? ("[\"" + urls.join("\",\"") + "\"]") : "[]"))
+            File.open(file_name, "w") { |file| file.puts new_contents }
+            puts "config::Successfully substituted the placeholders in telegraf conf file for daemonset"
+
+            #Set environment variables for telemetry
+            file = File.open("telemetry_prom_config_env_var", "w")
             if !file.nil?
-              file.write("export AZMON_DS_PROM_INTERVAL=#{interval}\n")
               file.write("export TELEMETRY_DS_PROM_INTERVAL=\"#{interval}\"\n")
-              file.write("export AZMON_DS_PROM_FIELDPASS=\"#{fieldPass.join("\",\"")}\"\n")
               #Setting array lengths as environment variables for telemetry purposes
               file.write("export TELEMETRY_DS_PROM_FIELDPASS_LENGTH=\"#{fieldPass.length}\"\n")
-              file.write("export AZMON_DS_PROM_FIELDDROP=#{fieldDrop.join("\",\"")}\n")
               file.write("export TELEMETRY_DS_PROM_FIELDDROP_LENGTH=\"#{fieldDrop.length}\"\n")
-              file.write("export AZMON_DS_PROM_URLS=#{urls.join("\",\"")}\n")
               file.write("export TELEMETRY_DS_PROM_URLS_LENGTH=#{urls.length}\n")
               # Close file after writing all environment variables
               file.close
-              puts "config::Successfully created custom config environment variable file for daemonset"
-
-              #Also substitute these values in the test config file for telegraf
-              file_name = "telegraf-test.conf"
-              text = File.read(file_name)
-              new_contents = text.gsub("$AZMON_DS_PROM_INTERVAL", interval)
-              new_contents = new_contents.gsub("$AZMON_DS_PROM_FIELDPASS", fieldPass.join("\",\""))
-              new_contents = new_contents.gsub("$AZMON_DS_PROM_FIELDDROP", fieldDrop.join("\",\""))
-              new_contents = new_contents.gsub("$AZMON_DS_PROM_URLS", urls.join("\",\""))
-              # To write changes to the file, use:
-              File.open(file_name, "w") { |file| file.puts new_contents }
-              puts "config::Successfully replaced the settings in test telegraf config file for daemonset"
-            else
-              puts "config::error::Exception while opening file for writing prometheus daemonset config environment variables"
-              puts "****************End Prometheus Config Processing********************"
+              puts "config::Successfully created telemetry file for daemonset"
             end
           else
             puts "config::Typecheck failed for prometheus config settings for daemonset, using defaults"
           end # end of type check condition
         rescue => errorStr
-          puts "config::error::Exception while reading config file for prometheus config for daemonset: #{errorStr}, using defaults"
+          puts "config::error::Exception while parsing config file for prometheus config for daemonset: #{errorStr}, using defaults"
           puts "****************End Prometheus Config Processing********************"
         end
       end # end of controller type check
diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go
index 301aff1ed..319ff3551 100644
--- a/source/code/go/src/plugins/oms.go
+++ b/source/code/go/src/plugins/oms.go
@@ -191,7 +191,6 @@ func updateContainerImageNameMaps() {
 		if err != nil {
 			message := fmt.Sprintf("Error getting pods %s\nIt is ok to log here and continue, because the logs will be missing image and Name, but the logs will still have the containerID", err.Error())
 			Log(message)
-			SendException(message)
 			continue
 		}
 
@@ -224,7 +223,7 @@ func populateExcludedStdoutNamespaces() {
 	if (strings.Compare(collectStdoutLogs, "true") == 0) && (len(excludeList) > 0) {
 		stdoutNSExcludeList = strings.Split(excludeList, ",")
 		for _, ns := range stdoutNSExcludeList {
-			Log ("Excluding namespace %s for stdout log collection", ns)
+			Log("Excluding namespace %s for stdout log collection", ns)
 			StdoutIgnoreNsSet[strings.TrimSpace(ns)] = true
 		}
 	}
@@ -237,7 +236,7 @@ func populateExcludedStderrNamespaces() {
 	if (strings.Compare(collectStderrLogs, "true") == 0) && (len(excludeList) > 0) {
 		stderrNSExcludeList = strings.Split(excludeList, ",")
 		for _, ns := range stderrNSExcludeList {
-			Log ("Excluding namespace %s for stderr log collection", ns)
+			Log("Excluding namespace %s for stderr log collection", ns)
 			StderrIgnoreNsSet[strings.TrimSpace(ns)] = true
 		}
 	}
@@ -382,7 +381,6 @@ func PostTelegrafMetricsToLA(telegrafRecords []map[interface{}]interface{}) int
 	if err != nil {
 		message := fmt.Sprintf("PostTelegrafMetricsToLA::Error:(retriable) when sending %v metrics. duration:%v err:%q \n", len(laMetrics), elapsed, err.Error())
 		Log(message)
-		SendException(message)
 		UpdateNumTelegrafMetricsSentTelemetry(0, 1)
 		return output.FLB_RETRY
 	}
@@ -423,7 +421,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 	nameIDMap := make(map[string]string)
 
 	DataUpdateMutex.Lock()
-	
+
 	for k, v := range ImageIDMap {
 		imageIDMap[k] = v
 	}
@@ -515,7 +513,8 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		if err != nil {
 			message := fmt.Sprintf("Error when sending request %s \n", err.Error())
 			Log(message)
-			SendException(message)
+			// Commenting this out for now. TODO - Add better telemetry for ods errors using aggregation
+			//SendException(message)
 			Log("Failed to flush %d records after %s", len(dataItems), elapsed)
 
 			return output.FLB_RETRY
@@ -559,7 +558,7 @@ func GetContainerIDK8sNamespaceFromFileName(filename string) (string, string) {
 
 	start := strings.LastIndex(filename, "-")
 	end := strings.LastIndex(filename, ".")
-	
+
 	if start >= end || start == -1 || end == -1 {
 		id = ""
 	} else {
@@ -639,7 +638,6 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	Log("containerInventoryRefreshInterval = %d \n", containerInventoryRefreshInterval)
 	ContainerImageNameRefreshTicker = time.NewTicker(time.Second * time.Duration(containerInventoryRefreshInterval))
 
-
 	// Populate Computer field
 	containerHostName, err := ioutil.ReadFile(pluginConfig["container_host_file_path"])
 	if err != nil {
@@ -678,11 +676,11 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 
 	CreateHTTPClient()
 
-  	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
+	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
 		populateExcludedStdoutNamespaces()
 		populateExcludedStderrNamespaces()
-		go updateContainerImageNameMaps()		
-  	} else {
+		go updateContainerImageNameMaps()
+	} else {
 		Log("Running in replicaset. Disabling container enrichment caching & updates \n")
 	}
 }
diff --git a/source/code/go/src/plugins/out_oms.go b/source/code/go/src/plugins/out_oms.go
index 0fa2ddd4b..e9e7124b7 100644
--- a/source/code/go/src/plugins/out_oms.go
+++ b/source/code/go/src/plugins/out_oms.go
@@ -64,8 +64,6 @@ func FLBPluginFlush(data unsafe.Pointer, length C.int, tag *C.char) int {
 		return PushToAppInsightsTraces(records, appinsights.Information, incomingTag)
 	} else if strings.Contains(incomingTag, "oms.container.perf.telegraf") {
 		return PostTelegrafMetricsToLA(records)
-	} else if strings.Contains(incomingTag, "oms.container.log.telegraf.err") {
-		return PushToAppInsightsTraces(records, appinsights.Error, incomingTag)
 	}
 
 	return PostDataHelper(records)
diff --git a/source/code/plugin/DockerApiClient.rb b/source/code/plugin/DockerApiClient.rb
index 5a46b5fdb..eb9d74531 100644
--- a/source/code/plugin/DockerApiClient.rb
+++ b/source/code/plugin/DockerApiClient.rb
@@ -2,179 +2,196 @@
 # frozen_string_literal: true
 
 class DockerApiClient
+  require "socket"
+  require "json"
+  require "timeout"
+  require_relative "omslog"
+  require_relative "DockerApiRestHelper"
+  require_relative "ApplicationInsightsUtility"
 
-    require 'socket'
-    require 'json'
-    require 'timeout'
-    require_relative 'omslog'
-    require_relative 'DockerApiRestHelper'
-    require_relative 'ApplicationInsightsUtility'
+  @@SocketPath = "/var/run/host/docker.sock"
+  @@ChunkSize = 4096
+  @@TimeoutInSeconds = 5
+  @@PluginName = "ContainerInventory"
 
-    @@SocketPath = "/var/run/host/docker.sock"
-    @@ChunkSize = 4096
-    @@TimeoutInSeconds = 5
-    @@PluginName = 'ContainerInventory'
+  def initialize
+  end
 
-    def initialize
-    end
-
-    class << self
-        # Make docker socket call for requests
-        def getResponse(request, isMultiJson, isVersion)
-            begin
-                socket = UNIXSocket.new(@@SocketPath)
-                dockerResponse = ""
-                isTimeOut = false
-                socket.write(request)
-                # iterate through the response until the last chunk is less than the chunk size so that we can read all data in socket.
-                loop do
-                    begin
-                        responseChunk = ""
-                        timeout(@@TimeoutInSeconds) do
-                            responseChunk = socket.recv(@@ChunkSize)
-                        end
-                        dockerResponse += responseChunk
-                    rescue Timeout::Error
-                        $log.warn("Socket read timedout for request: #{request} @ #{Time.now.utc.iso8601}")
-                        isTimeOut = true
-                        break
-                    end
-                    break if (isVersion)? (responseChunk.length < @@ChunkSize) : (responseChunk.end_with? "0\r\n\r\n")
-                end
-                socket.close
-                return (isTimeOut)? nil : parseResponse(dockerResponse, isMultiJson)
-            rescue => errorStr
-                $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+  class << self
+    # Make docker socket call for requests
+    def getResponse(request, isMultiJson, isVersion)
+      begin
+        socket = UNIXSocket.new(@@SocketPath)
+        dockerResponse = ""
+        isTimeOut = false
+        socket.write(request)
+        # iterate through the response until the last chunk is less than the chunk size so that we can read all data in socket.
+        loop do
+          begin
+            responseChunk = ""
+            timeout(@@TimeoutInSeconds) do
+              responseChunk = socket.recv(@@ChunkSize)
             end
+            dockerResponse += responseChunk
+          rescue Timeout::Error
+            $log.warn("Socket read timedout for request: #{request} @ #{Time.now.utc.iso8601}")
+            isTimeOut = true
+            break
+          end
+          break if (isVersion) ? (responseChunk.length < @@ChunkSize) : (responseChunk.end_with? "0\r\n\r\n")
         end
+        socket.close
+        return (isTimeOut) ? nil : parseResponse(dockerResponse, isMultiJson)
+      rescue => errorStr
+        $log.warn("Socket call failed for request: #{request} error: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end
 
-        def parseResponse(dockerResponse, isMultiJson)
-            # Doing this because the response is in the raw format and includes headers.
-            # Need to do a regex match to extract the json part of the response - Anything between [{}] in response
-            parsedJsonResponse = nil
-            begin
-                jsonResponse = isMultiJson ? dockerResponse[/\[{.+}\]/] : dockerResponse[/{.+}/]
-            rescue => errorStr
-                $log.warn("Regex match for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
-            end
-            begin
-                if jsonResponse != nil
-                    parsedJsonResponse = JSON.parse(jsonResponse)
-                end
-            rescue => errorStr
-                $log.warn("Json parsing for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-            end 
-            return parsedJsonResponse
-        end 
+    def parseResponse(dockerResponse, isMultiJson)
+      # Doing this because the response is in the raw format and includes headers.
+      # Need to do a regex match to extract the json part of the response - Anything between [{}] in response
+      parsedJsonResponse = nil
+      begin
+        jsonResponse = isMultiJson ? dockerResponse[/\[{.+}\]/] : dockerResponse[/{.+}/]
+      rescue => errorStr
+        $log.warn("Regex match for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
+      end
+      begin
+        if jsonResponse != nil
+          parsedJsonResponse = JSON.parse(jsonResponse)
+        end
+      rescue => errorStr
+        $log.warn("Json parsing for docker response failed: #{errorStr} , isMultiJson: #{isMultiJson} @ #{Time.now.utc.iso8601}")
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+      return parsedJsonResponse
+    end
 
+    def getDockerHostName()
+      dockerHostName = ""
+      request = DockerApiRestHelper.restDockerInfo
+      response = getResponse(request, false, false)
+      if (response != nil)
+        dockerHostName = response["Name"]
+      end
+      return dockerHostName
+    end
 
-        def getDockerHostName()
-            dockerHostName = ""
-            request = DockerApiRestHelper.restDockerInfo
-            response = getResponse(request, false, false)
-            if (response != nil)
-                dockerHostName = response['Name']
+    def listContainers()
+      ids = []
+      request = DockerApiRestHelper.restDockerPs
+      containers = getResponse(request, true, false)
+      if !containers.nil? && !containers.empty?
+        containers.each do |container|
+          labels = (!container["Labels"].nil?) ? container["Labels"] : container["labels"]
+          if !labels.nil?
+            labelKeys = labels.keys
+            dockerTypeLabel = labelKeys.find { |k| "io.kubernetes.docker.type".downcase == k.downcase }
+            if !dockerTypeLabel.nil?
+              dockerTypeLabelValue = labels[dockerTypeLabel]
+              # Checking for 'io.kubernetes.docker.type' label for docker containers to exclude the pause-amd64 containers
+              if !(dockerTypeLabelValue.downcase == "podsandbox".downcase)
+                # Case insensitive lookup for pod uid label - This is to exclude containers created using docker run and only include containers that
+                # are created in the pods for ContainerInventory
+                keyValue = labelKeys.find { |k| "io.kubernetes.pod.uid".downcase == k.downcase }
+                if !labels[keyValue].nil?
+                  ids.push(container["Id"])
+                end
+              end
             end
-            return dockerHostName
+          end
         end
+      end
+      return ids
+    end
 
-        def listContainers()
-            ids = []
-            request = DockerApiRestHelper.restDockerPs
-            containers = getResponse(request, true, false)
-            if !containers.nil? && !containers.empty?
-                containers.each do |container|
-                    labels = (!container['Labels'].nil?)? container['Labels'] : container['labels']
-                    if !labels.nil?
-                        labelKeys = labels.keys
-                        dockerTypeLabel = labelKeys.find {|k| 'io.kubernetes.docker.type'.downcase == k.downcase}
-                        if !dockerTypeLabel.nil?
-                            dockerTypeLabelValue = labels[dockerTypeLabel]
-                            # Checking for 'io.kubernetes.docker.type' label for docker containers to exclude the pause-amd64 containers
-                            if !(dockerTypeLabelValue.downcase == "podsandbox".downcase)
-                                # Case insensitive lookup for pod uid label - This is to exclude containers created using docker run and only include containers that 
-                                # are created in the pods for ContainerInventory
-                                keyValue = labelKeys.find {|k| 'io.kubernetes.pod.uid'.downcase == k.downcase}
-                                if !labels[keyValue].nil?
-                                    ids.push(container['Id'])
-                                end
-                            end
-                        end
-                    end
-                end
-            end
-            return ids
+    # This method splits the tag value into an array - repository, image, tag, repodigest-imageid
+    def getImageRepositoryImageTag(tagValue, digestValue)
+      result = ["", "", "", ""]
+      atLocation = nil
+      begin
+        if !digestValue.empty?
+          # digest is of the format - repo@sha256:imageid
+          atLocation = digestValue.index("@")
+          if !atLocation.nil?
+            result[3] = digestValue[(atLocation + 1)..-1]
+          end
         end
 
-        # This method splits the tag value into an array - repository, image and tag
-        def getImageRepositoryImageTag(tagValue)
-            result = ["", "", ""]
-            begin
-                if !tagValue.empty?
-                    # Find delimiters in the string of format repository/image:imagetag
-                    slashLocation = tagValue.index('/')
-                    colonLocation = tagValue.index(':')
-                    if !colonLocation.nil?
-                        if slashLocation.nil?
-                            # image:imagetag
-                            result[1] = tagValue[0..(colonLocation-1)]
-                        else
-                            # repository/image:imagetag
-                            result[0] = tagValue[0..(slashLocation-1)]
-                            result[1] = tagValue[(slashLocation + 1)..(colonLocation - 1)]
-                        end
-                        result[2] = tagValue[(colonLocation + 1)..-1]
-                    end
-                end
-            rescue => errorStr
-                $log.warn("Exception at getImageRepositoryImageTag: #{errorStr} @ #{Time.now.utc.iso8601}")
+        if !tagValue.empty?
+          # Find delimiters in the string of format repository/image:imagetag
+          slashLocation = tagValue.index("/")
+          colonLocation = tagValue.index(":")
+          if !colonLocation.nil?
+            if slashLocation.nil?
+              # image:imagetag
+              result[1] = tagValue[0..(colonLocation - 1)]
+            else
+              # repository/image:imagetag
+              result[0] = tagValue[0..(slashLocation - 1)]
+              result[1] = tagValue[(slashLocation + 1)..(colonLocation - 1)]
             end
-            return result
+            result[2] = tagValue[(colonLocation + 1)..-1]
+          end
+        elsif !digestValue.empty?
+          # Getting repo information from repodigests when repotags is empty
+          if !atLocation.nil?
+            result[0] = digestValue[0..(atLocation - 1)]
+          end
         end
+      rescue => errorStr
+        $log.warn("Exception at getImageRepositoryImageTag: #{errorStr} @ #{Time.now.utc.iso8601}")
+      end
+      return result
+    end
 
-        # Image is in the format repository/image:imagetag - This method creates a hash of image id and repository, image and tag
-        def getImageIdMap()
-            result = nil
-            begin
-                request = DockerApiRestHelper.restDockerImages
-                images = getResponse(request, true, false)
-                if !images.nil? && !images.empty?
-                    result = {}
-                    images.each do |image|
-                        tagValue = ""
-                        tags = image['RepoTags']
-                        if !tags.nil? && tags.kind_of?(Array) && tags.length > 0
-                            tagValue = tags[0]
-                        end
-                        idValue = image['Id']
-                        if !idValue.nil?
-                            result[idValue] = getImageRepositoryImageTag(tagValue)
-                        end
-                    end
-                end
-            rescue => errorStr
-                $log.warn("Exception at getImageIdMap: #{errorStr} @ #{Time.now.utc.iso8601}")
+    # Image is in the format repository/image:imagetag - This method creates a hash of image id and repository, image and tag
+    def getImageIdMap()
+      result = nil
+      begin
+        request = DockerApiRestHelper.restDockerImages
+        images = getResponse(request, true, false)
+        if !images.nil? && !images.empty?
+          result = {}
+          images.each do |image|
+            tagValue = ""
+            tags = image["RepoTags"]
+            if !tags.nil? && tags.kind_of?(Array) && tags.length > 0
+              tagValue = tags[0]
+            end
+            digestValue = ""
+            digests = image["RepoDigests"]
+            if !digests.nil? && digests.kind_of?(Array) && digests.length > 0
+              digestValue = digests[0]
+            end
+            idValue = image["Id"]
+            if !idValue.nil?
+              result[idValue] = getImageRepositoryImageTag(tagValue, digestValue)
             end
-            return result
+          end
         end
+      rescue => errorStr
+        $log.warn("Exception at getImageIdMap: #{errorStr} @ #{Time.now.utc.iso8601}")
+      end
+      return result
+    end
 
-        def dockerInspectContainer(id)
-            request = DockerApiRestHelper.restDockerInspect(id)
-            return getResponse(request, false, false)
-        end
+    def dockerInspectContainer(id)
+      request = DockerApiRestHelper.restDockerInspect(id)
+      return getResponse(request, false, false)
+    end
 
-        # This method returns docker version and docker api version for telemetry
-        def dockerInfo()
-            request = DockerApiRestHelper.restDockerVersion
-            response = getResponse(request, false, true)
-            dockerInfo = {}
-            if (response != nil)
-                dockerInfo['Version'] = response['Version']
-                dockerInfo['ApiVersion'] = response['ApiVersion']
-            end
-            return dockerInfo
-        end
+    # This method returns docker version and docker api version for telemetry
+    def dockerInfo()
+      request = DockerApiRestHelper.restDockerVersion
+      response = getResponse(request, false, true)
+      dockerInfo = {}
+      if (response != nil)
+        dockerInfo["Version"] = response["Version"]
+        dockerInfo["ApiVersion"] = response["ApiVersion"]
+      end
+      return dockerInfo
     end
+  end
 end
diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb
index 3c6b4f203..58a276cfd 100644
--- a/source/code/plugin/KubernetesApiClient.rb
+++ b/source/code/plugin/KubernetesApiClient.rb
@@ -57,7 +57,7 @@ def getKubeResourceInfo(resource)
       rescue => error
         @Log.warn("kubernetes api request failed: #{error} for #{resource} @ #{Time.now.utc.iso8601}")
       end
-      if (response.body.empty?)
+      if (!response.nil? && !response.body.nil? && response.body.empty?)
         @Log.warn("KubernetesAPIClient::getKubeResourceInfo : Got empty response from Kube API for #{resource} @ #{Time.now.utc.iso8601}")
       end
       return response
diff --git a/source/code/plugin/in_containerinventory.rb b/source/code/plugin/in_containerinventory.rb
index 05e5bc9ea..4392de280 100644
--- a/source/code/plugin/in_containerinventory.rb
+++ b/source/code/plugin/in_containerinventory.rb
@@ -170,12 +170,13 @@ def inspectContainer(id, nameMap, clusterCollectEnvironmentVar)
           end
           imageValue = container["Image"]
           if !imageValue.nil? && !imageValue.empty?
-            containerInstance["ImageId"] = imageValue
             repoImageTagArray = nameMap[imageValue]
             if nameMap.has_key? imageValue
               containerInstance["Repository"] = repoImageTagArray[0]
               containerInstance["Image"] = repoImageTagArray[1]
               containerInstance["ImageTag"] = repoImageTagArray[2]
+              # Setting the image id to the id in the remote repository
+              containerInstance["ImageId"] = repoImageTagArray[3]
             end
           end
           obtainContainerConfig(containerInstance, container, clusterCollectEnvironmentVar)
@@ -200,7 +201,7 @@ def enumerate
         if !containerIds.empty?
           eventStream = MultiEventStream.new
           nameMap = DockerApiClient.getImageIdMap
-          clusterCollectEnvironmentVar = ENV['AZMON_CLUSTER_COLLECT_ENV_VAR']
+          clusterCollectEnvironmentVar = ENV["AZMON_CLUSTER_COLLECT_ENV_VAR"]
           if !clusterCollectEnvironmentVar.nil? && !clusterCollectEnvironmentVar.empty? && clusterCollectEnvironmentVar.casecmp("false") == 0
             $log.warn("Environment Variable collection disabled for cluster")
           end
diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb
index 309dd8034..3a0e04c67 100644
--- a/source/code/plugin/in_kube_events.rb
+++ b/source/code/plugin/in_kube_events.rb
@@ -2,27 +2,25 @@
 # frozen_string_literal: true
 
 module Fluent
-
   class Kube_Event_Input < Input
-    Plugin.register_input('kubeevents', self)
+    Plugin.register_input("kubeevents", self)
 
     @@KubeEventsStateFile = "/var/opt/microsoft/docker-cimprov/state/KubeEventQueryState.yaml"
 
     def initialize
       super
-      require 'json'
-
-      require_relative 'KubernetesApiClient'
-      require_relative 'oms_common'
-      require_relative 'omslog'
-      require_relative 'ApplicationInsightsUtility'
+      require "json"
 
+      require_relative "KubernetesApiClient"
+      require_relative "oms_common"
+      require_relative "omslog"
+      require_relative "ApplicationInsightsUtility"
     end
 
-    config_param :run_interval, :time, :default => '1m'
+    config_param :run_interval, :time, :default => "1m"
     config_param :tag, :string, :default => "oms.containerinsights.KubeEvents"
 
-    def configure (conf)
+    def configure(conf)
       super
     end
 
@@ -46,63 +44,62 @@ def shutdown
     end
 
     def enumerate(eventList = nil)
-        currentTime = Time.now
-        emitTime = currentTime.to_f
-        batchTime = currentTime.utc.iso8601
-          if eventList.nil?
-            $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
-            events = JSON.parse(KubernetesApiClient.getKubeResourceInfo('events').body)
-            $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
-          else
-            events = eventList
+      currentTime = Time.now
+      emitTime = currentTime.to_f
+      batchTime = currentTime.utc.iso8601
+      if eventList.nil?
+        $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}")
+        events = JSON.parse(KubernetesApiClient.getKubeResourceInfo("events").body)
+        $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
+      else
+        events = eventList
+      end
+      eventQueryState = getEventQueryState
+      newEventQueryState = []
+      begin
+        if (!events.empty? && !events["items"].nil?)
+          eventStream = MultiEventStream.new
+          events["items"].each do |items|
+            record = {}
+            #<BUGBUG> - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion
+            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+            eventId = items["metadata"]["uid"] + "/" + items["count"].to_s
+            newEventQueryState.push(eventId)
+            if !eventQueryState.empty? && eventQueryState.include?(eventId)
+              next
+            end
+            record["ObjectKind"] = items["involvedObject"]["kind"]
+            record["Namespace"] = items["involvedObject"]["namespace"]
+            record["Name"] = items["involvedObject"]["name"]
+            record["Reason"] = items["reason"]
+            record["Message"] = items["message"]
+            record["Type"] = items["type"]
+            record["TimeGenerated"] = items["metadata"]["creationTimestamp"]
+            record["SourceComponent"] = items["source"]["component"]
+            record["FirstSeen"] = items["firstTimestamp"]
+            record["LastSeen"] = items["lastTimestamp"]
+            record["Count"] = items["count"]
+            if items["source"].key?("host")
+              record["Computer"] = items["source"]["host"]
+            else
+              record["Computer"] = (OMS::Common.get_hostname)
+            end
+            record["ClusterName"] = KubernetesApiClient.getClusterName
+            record["ClusterId"] = KubernetesApiClient.getClusterId
+            wrapper = {
+              "DataType" => "KUBE_EVENTS_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [record.each { |k, v| record[k] = v }],
+            }
+            eventStream.add(emitTime, wrapper) if wrapper
           end
-          eventQueryState = getEventQueryState
-          newEventQueryState = []
-          begin
-            if(!events.empty?)
-              eventStream = MultiEventStream.new
-              events['items'].each do |items|
-                record = {}
-                #<BUGBUG> - Not sure if ingestion has the below mapping for this custom type. Fix it as part of fixed type conversion
-                record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
-                eventId = items['metadata']['uid'] + "/" + items['count'].to_s  
-                newEventQueryState.push(eventId)
-                if !eventQueryState.empty? && eventQueryState.include?(eventId)
-                  next
-                end  
-                record['ObjectKind']= items['involvedObject']['kind']
-                record['Namespace'] = items['involvedObject']['namespace']
-                record['Name'] = items['involvedObject']['name']
-                record['Reason'] = items['reason']
-                record['Message'] = items['message']
-                record['Type'] = items['type']
-                record['TimeGenerated'] = items['metadata']['creationTimestamp']
-                record['SourceComponent'] = items['source']['component']
-                record['FirstSeen'] = items['firstTimestamp']
-                record['LastSeen'] = items['lastTimestamp']
-                record['Count'] = items['count']
-                if items['source'].key?('host')
-                        record['Computer'] = items['source']['host']
-                else
-                        record['Computer'] = (OMS::Common.get_hostname)
-                end
-                record['ClusterName'] = KubernetesApiClient.getClusterName
-                record['ClusterId'] = KubernetesApiClient.getClusterId
-                wrapper = {
-                  "DataType"=>"KUBE_EVENTS_BLOB",
-                  "IPName"=>"ContainerInsights",
-                  "DataItems"=>[record.each{|k,v| record[k]=v}]
-                }
-                eventStream.add(emitTime, wrapper) if wrapper
-              end
-              router.emit_stream(@tag, eventStream) if eventStream
-            end  
-            writeEventQueryState(newEventQueryState)
-          rescue  => errorStr
-            $log.warn line.dump, error: errorStr.to_s
-            $log.debug_backtrace(errorStr.backtrace)
-            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end   
+          router.emit_stream(@tag, eventStream) if eventStream
+        end
+        writeEventQueryState(newEventQueryState)
+      rescue => errorStr
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
     end
 
     def run_periodic
@@ -135,7 +132,7 @@ def getEventQueryState
             eventQueryState.push(line.chomp) #puts will append newline which needs to be removed
           end
         end
-      rescue  => errorStr
+      rescue => errorStr
         $log.warn $log.warn line.dump, error: errorStr.to_s
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
@@ -145,20 +142,17 @@ def getEventQueryState
 
     def writeEventQueryState(eventQueryState)
       begin
-        if(!eventQueryState.nil? && !eventQueryState.empty?)
+        if (!eventQueryState.nil? && !eventQueryState.empty?)
           # No need to close file handle (f) due to block scope
           File.open(@@KubeEventsStateFile, "w") do |f|
             f.puts(eventQueryState)
           end
         end
-      rescue  => errorStr
+      rescue => errorStr
         $log.warn $log.warn line.dump, error: errorStr.to_s
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
     end
-
   end # Kube_Event_Input
-
 end # module
-
diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb
index aabda441e..0310fa419 100644
--- a/source/code/plugin/in_kube_nodes.rb
+++ b/source/code/plugin/in_kube_nodes.rb
@@ -58,81 +58,83 @@ def enumerate
         if (!nodeInventory.empty?)
           eventStream = MultiEventStream.new
           containerNodeInventoryEventStream = MultiEventStream.new
-          #get node inventory
-          nodeInventory["items"].each do |items|
-            record = {}
-            # Sending records for ContainerNodeInventory
-            containerNodeInventoryRecord = {}
-            containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-            containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
+          if !nodeInventory["items"].nil?
+            #get node inventory
+            nodeInventory["items"].each do |items|
+              record = {}
+              # Sending records for ContainerNodeInventory
+              containerNodeInventoryRecord = {}
+              containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+              containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
 
-            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-            record["Computer"] = items["metadata"]["name"]
-            record["ClusterName"] = KubernetesApiClient.getClusterName
-            record["ClusterId"] = KubernetesApiClient.getClusterId
-            record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
-            record["Labels"] = [items["metadata"]["labels"]]
-            record["Status"] = ""
+              record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+              record["Computer"] = items["metadata"]["name"]
+              record["ClusterName"] = KubernetesApiClient.getClusterName
+              record["ClusterId"] = KubernetesApiClient.getClusterId
+              record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
+              record["Labels"] = [items["metadata"]["labels"]]
+              record["Status"] = ""
 
-            # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
-            # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
-            # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
-            # implying that the node is ready for hosting pods, however its out of disk.
+              # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
+              # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
+              # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
+              # implying that the node is ready for hosting pods, however its out of disk.
 
-            if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
-              allNodeConditions = ""
-              items["status"]["conditions"].each do |condition|
-                if condition["status"] == "True"
-                  if !allNodeConditions.empty?
-                    allNodeConditions = allNodeConditions + "," + condition["type"]
-                  else
-                    allNodeConditions = condition["type"]
+              if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
+                allNodeConditions = ""
+                items["status"]["conditions"].each do |condition|
+                  if condition["status"] == "True"
+                    if !allNodeConditions.empty?
+                      allNodeConditions = allNodeConditions + "," + condition["type"]
+                    else
+                      allNodeConditions = condition["type"]
+                    end
+                  end
+                  #collect last transition to/from ready (no matter ready is true/false)
+                  if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
+                    record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
                   end
                 end
-                #collect last transition to/from ready (no matter ready is true/false)
-                if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
-                  record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
+                if !allNodeConditions.empty?
+                  record["Status"] = allNodeConditions
                 end
               end
-              if !allNodeConditions.empty?
-                record["Status"] = allNodeConditions
-              end
-            end
 
-            nodeInfo = items["status"]["nodeInfo"]
-            record["KubeletVersion"] = nodeInfo["kubeletVersion"]
-            record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
-            containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
-            dockerVersion = nodeInfo["containerRuntimeVersion"]
-            dockerVersion.slice! "docker://"
-            containerNodeInventoryRecord["DockerVersion"] = dockerVersion
-            # ContainerNodeInventory data for docker version and operating system.
-            containerNodeInventoryWrapper = {
-              "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
-              "IPName" => "ContainerInsights",
-              "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
-            }
-            containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
+              nodeInfo = items["status"]["nodeInfo"]
+              record["KubeletVersion"] = nodeInfo["kubeletVersion"]
+              record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
+              containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
+              dockerVersion = nodeInfo["containerRuntimeVersion"]
+              dockerVersion.slice! "docker://"
+              containerNodeInventoryRecord["DockerVersion"] = dockerVersion
+              # ContainerNodeInventory data for docker version and operating system.
+              containerNodeInventoryWrapper = {
+                "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
+                "IPName" => "ContainerInsights",
+                "DataItems" => [containerNodeInventoryRecord.each { |k, v| containerNodeInventoryRecord[k] = v }],
+              }
+              containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
 
-            wrapper = {
-              "DataType" => "KUBE_NODE_INVENTORY_BLOB",
-              "IPName" => "ContainerInsights",
-              "DataItems" => [record.each { |k, v| record[k] = v }],
-            }
-            eventStream.add(emitTime, wrapper) if wrapper
-            # Adding telemetry to send node telemetry every 5 minutes
-            timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
-            timeDifferenceInMinutes = timeDifference / 60
-            if (timeDifferenceInMinutes >= 5)
-              properties = {}
-              properties["Computer"] = record["Computer"]
-              properties["KubeletVersion"] = record["KubeletVersion"]
-              properties["OperatingSystem"] = nodeInfo["operatingSystem"]
-              properties["DockerVersion"] = dockerVersion
-              capacityInfo = items["status"]["capacity"]
-              ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
-              ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
-              telemetrySent = true
+              wrapper = {
+                "DataType" => "KUBE_NODE_INVENTORY_BLOB",
+                "IPName" => "ContainerInsights",
+                "DataItems" => [record.each { |k, v| record[k] = v }],
+              }
+              eventStream.add(emitTime, wrapper) if wrapper
+              # Adding telemetry to send node telemetry every 5 minutes
+              timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
+              timeDifferenceInMinutes = timeDifference / 60
+              if (timeDifferenceInMinutes >= 5)
+                properties = {}
+                properties["Computer"] = record["Computer"]
+                properties["KubeletVersion"] = record["KubeletVersion"]
+                properties["OperatingSystem"] = nodeInfo["operatingSystem"]
+                properties["DockerVersion"] = dockerVersion
+                capacityInfo = items["status"]["capacity"]
+                ApplicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
+                ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
+                telemetrySent = true
+              end
             end
           end
           router.emit_stream(@tag, eventStream) if eventStream
diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 79490ba7d..9c5fef1d7 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -152,8 +152,10 @@ def getContainerEnvironmentVariables(pod, clusterCollectEnvironmentVar)
                 containerEnvArray.each do |envVarHash|
                   envName = envVarHash["name"]
                   envValue = envVarHash["value"]
-                  envArrayElement = envName + "=" + envValue
-                  envVarsArray.push(envArrayElement)
+                  if !envName.nil? && !envValue.nil?
+                    envArrayElement = envName + "=" + envValue
+                    envVarsArray.push(envArrayElement)
+                  end
                 end
               end
               # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE
@@ -201,7 +203,9 @@ def parse_and_emit_records(podInventory, serviceList)
             # instead of the actual poduid. Since this uid is not being surface into the UX
             # its ok to use this.
             # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
-            podUid = items["metadata"]["annotations"]["kubernetes.io/config.hash"]
+            if !items["metadata"]["annotations"].nil?
+              podUid = items["metadata"]["annotations"]["kubernetes.io/config.hash"]
+            end
           else
             podUid = items["metadata"]["uid"]
           end
@@ -287,7 +291,9 @@ def parse_and_emit_records(podInventory, serviceList)
                 record["ContainerID"] = ""
               end
               #keeping this as <PodUid/container_name> which is same as InstanceName in perf table
-              record["ContainerName"] = podUid + "/" + container["name"]
+              if !podUid.nil? && !container["name"].nil?
+                record["ContainerName"] = podUid + "/" + container["name"]
+              end
               #Pod restart count is a sumtotal of restart counts of individual containers
               #within the pod. The restart count of a container is maintained by kubernetes
               #itself in the form of a container label.
diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb
index e1bb93f30..8b0a013e4 100644
--- a/source/code/plugin/in_kube_services.rb
+++ b/source/code/plugin/in_kube_services.rb
@@ -2,108 +2,101 @@
 # frozen_string_literal: true
 
 module Fluent
-    
-      class Kube_Services_Input < Input
-        Plugin.register_input('kubeservices', self)
-    
-        def initialize
-          super
-          require 'yaml'
-          require 'json'
-    
-          require_relative 'KubernetesApiClient'
-          require_relative 'oms_common'
-          require_relative 'omslog'
-          require_relative 'ApplicationInsightsUtility'
+  class Kube_Services_Input < Input
+    Plugin.register_input("kubeservices", self)
 
-        end
-    
-        config_param :run_interval, :time, :default => '1m'
-        config_param :tag, :string, :default => "oms.containerinsights.KubeServices"
-    
-        def configure (conf)
-          super
-        end
-    
-        def start
-          if @run_interval
-            @finished = false
-            @condition = ConditionVariable.new
-            @mutex = Mutex.new
-            @thread = Thread.new(&method(:run_periodic))
-          end
-        end
-    
-        def shutdown
-          if @run_interval
-            @mutex.synchronize {
-              @finished = true
-              @condition.signal
+    def initialize
+      super
+      require "yaml"
+      require "json"
+
+      require_relative "KubernetesApiClient"
+      require_relative "oms_common"
+      require_relative "omslog"
+      require_relative "ApplicationInsightsUtility"
+    end
+
+    config_param :run_interval, :time, :default => "1m"
+    config_param :tag, :string, :default => "oms.containerinsights.KubeServices"
+
+    def configure(conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
+      end
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
+      end
+    end
+
+    def enumerate
+      currentTime = Time.now
+      emitTime = currentTime.to_f
+      batchTime = currentTime.utc.iso8601
+      $log.info("in_kube_services::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
+      serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body)
+      $log.info("in_kube_services::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
+      begin
+        if (!serviceList.empty?)
+          eventStream = MultiEventStream.new
+          serviceList["items"].each do |items|
+            record = {}
+            record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+            record["ServiceName"] = items["metadata"]["name"]
+            record["Namespace"] = items["metadata"]["namespace"]
+            record["SelectorLabels"] = [items["spec"]["selector"]]
+            record["ClusterId"] = KubernetesApiClient.getClusterId
+            record["ClusterName"] = KubernetesApiClient.getClusterName
+            record["ClusterIP"] = items["spec"]["clusterIP"]
+            record["ServiceType"] = items["spec"]["type"]
+            #<TODO> : Add ports and status fields
+            wrapper = {
+              "DataType" => "KUBE_SERVICES_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [record.each { |k, v| record[k] = v }],
             }
-            @thread.join
+            eventStream.add(emitTime, wrapper) if wrapper
           end
+          router.emit_stream(@tag, eventStream) if eventStream
         end
-    
-        def enumerate
-            currentTime = Time.now
-            emitTime = currentTime.to_f
-            batchTime = currentTime.utc.iso8601
-              $log.info("in_kube_services::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
-              serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo('services').body)
-              $log.info("in_kube_services::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}")
-              begin
-                if(!serviceList.empty?)
-                  eventStream = MultiEventStream.new
-                  serviceList['items'].each do |items|
-                    record = {}
-                    record['CollectionTime'] = batchTime #This is the time that is mapped to become TimeGenerated
-                    record['ServiceName'] = items['metadata']['name']
-                    record['Namespace'] = items['metadata']['namespace']
-                    record['SelectorLabels'] = [items['spec']['selector']]
-                    record['ClusterId'] = KubernetesApiClient.getClusterId
-                    record['ClusterName'] = KubernetesApiClient.getClusterName
-                    record['ClusterIP'] = items['spec']['clusterIP']
-                    record['ServiceType'] = items['spec']['type']
-                    #<TODO> : Add ports and status fields
-                    wrapper = {
-                      "DataType"=>"KUBE_SERVICES_BLOB",
-                      "IPName"=>"ContainerInsights",
-                      "DataItems"=>[record.each{|k,v| record[k]=v}]
-                    }
-                    eventStream.add(emitTime, wrapper) if wrapper  
-                  end
-                  router.emit_stream(@tag, eventStream) if eventStream
-                end  
-              rescue  => errorStr
-                $log.warn line.dump, error: errorStr.to_s
-                $log.debug_backtrace(e.backtrace)
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-              end   
-        end
-    
-        def run_periodic
-          @mutex.lock
-          done = @finished
-          until done
-            @condition.wait(@mutex, @run_interval)
-            done = @finished
-            @mutex.unlock
-            if !done
-              begin
-                $log.info("in_kube_services::run_periodic @ #{Time.now.utc.iso8601}")
-                enumerate
-              rescue => errorStr
-                $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}"
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-              end
-            end
-            @mutex.lock
+      rescue => errorStr
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      until done
+        @condition.wait(@mutex, @run_interval)
+        done = @finished
+        @mutex.unlock
+        if !done
+          begin
+            $log.info("in_kube_services::run_periodic @ #{Time.now.utc.iso8601}")
+            enumerate
+          rescue => errorStr
+            $log.warn "in_kube_services::run_periodic: enumerate Failed to kube services: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
           end
-          @mutex.unlock
         end
-    
-      end # Kube_Services_Input
-    
-    end # module
-    
-    
\ No newline at end of file
+        @mutex.lock
+      end
+      @mutex.unlock
+    end
+  end # Kube_Services_Input
+end # module

From 4f87b582f19083587ce34cea7af50108f6e2d105 Mon Sep 17 00:00:00 2001
From: rashmy <rashmy@RASHMY-PC2>
Date: Tue, 9 Jul 2019 13:30:56 -0700
Subject: [PATCH 10/12] changes

---
 installer/conf/telegraf-rs.conf | 26 ++++++++++----------------
 installer/conf/telegraf.conf    | 11 -----------
 2 files changed, 10 insertions(+), 27 deletions(-)

diff --git a/installer/conf/telegraf-rs.conf b/installer/conf/telegraf-rs.conf
index 53aa03620..ce60bfa04 100644
--- a/installer/conf/telegraf-rs.conf
+++ b/installer/conf/telegraf-rs.conf
@@ -538,15 +538,20 @@
 
 #Prometheus Custom Metrics
 [[inputs.prometheus]]
-  #name_prefix="container.azm.ms/"
-  ## An array of urls to scrape metrics from.
-  #urls = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "http://$NODE_IP:10254/metrics", "http://$NODE_IP:9100/metrics"]
-  #fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]
   interval = "$AZMON_RS_PROM_INTERVAL"
+
   ## An array of urls to scrape metrics from.
   urls = $AZMON_RS_PROM_URLS
-
+  
+  ## An array of Kubernetes services to scrape metrics from.
   kubernetes_services = $AZMON_RS_PROM_K8S_SERVICES
+  
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to `https` & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
   monitor_kubernetes_pods = $AZMON_RS_PROM_MONITOR_PODS
 
   fieldpass = $AZMON_RS_PROM_FIELDPASS
@@ -555,20 +560,9 @@
   metric_version = 2
   url_tag = "scrapeUrl"
 
-  ## An array of Kubernetes services to scrape metrics from.
-  #kubernetes_services = ["https://kube-state-metrics.monitoring:8443/metrics","https://kube-state-metrics.monitoring:9443/metrics","http://oce-scc-template-nginx-ingress-controller.oce-nginx:10254/metrics"]
-
   ## Kubernetes config file to create client from.
   # kube_config = "/path/to/kubernetes.config"
 
-  ## Scrape Kubernetes pods for the following prometheus annotations:
-  ## - prometheus.io/scrape: Enable scraping for this pod
-  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
-  ##     set this to `https` & most likely set the tls config.
-  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
-  ## - prometheus.io/port: If port is not 9102 use this annotation
-  # monitor_kubernetes_pods = true
-
   ## Use bearer token for authorization. ('bearer_token' takes priority)
   bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
   ## OR
diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf
index 47e71c5f5..4883de81b 100644
--- a/installer/conf/telegraf.conf
+++ b/installer/conf/telegraf.conf
@@ -583,20 +583,9 @@
   metric_version = 2
   url_tag = "scrapeUrl"
 
-  ## An array of Kubernetes services to scrape metrics from.
-  #kubernetes_services = ["http://$NODE_IP:10255/metrics", "http://$NODE_IP:10255/metrics/cadvisor", "https://$NODE_IP:9100/metrics"]
-
   ## Kubernetes config file to create client from.
   # kube_config = "/path/to/kubernetes.config"
 
-  ## Scrape Kubernetes pods for the following prometheus annotations:
-  ## - prometheus.io/scrape: Enable scraping for this pod
-  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
-  ##     set this to `https` & most likely set the tls config.
-  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
-  ## - prometheus.io/port: If port is not 9102 use this annotation
-  # monitor_kubernetes_pods = true
-
   ## Use bearer token for authorization. ('bearer_token' takes priority)
   bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
   ## OR

From 574ad1cdc91a98610485bc6976a4bd470a8a91a9 Mon Sep 17 00:00:00 2001
From: rashmy <rashmy@RASHMY-PC2>
Date: Tue, 9 Jul 2019 15:09:44 -0700
Subject: [PATCH 11/12] changes for poduid nil check

---
 source/code/plugin/in_kube_podinventory.rb | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb
index 9c5fef1d7..d0056fb14 100644
--- a/source/code/plugin/in_kube_podinventory.rb
+++ b/source/code/plugin/in_kube_podinventory.rb
@@ -203,7 +203,9 @@ def parse_and_emit_records(podInventory, serviceList)
             # instead of the actual poduid. Since this uid is not being surface into the UX
             # its ok to use this.
             # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
-            if !items["metadata"]["annotations"].nil?
+            if items["metadata"]["annotations"].nil?
+              next
+            else
               podUid = items["metadata"]["annotations"]["kubernetes.io/config.hash"]
             end
           else
@@ -291,7 +293,9 @@ def parse_and_emit_records(podInventory, serviceList)
                 record["ContainerID"] = ""
               end
               #keeping this as <PodUid/container_name> which is same as InstanceName in perf table
-              if !podUid.nil? && !container["name"].nil?
+              if podUid.nil? || container["name"].nil?
+                next
+              else
                 record["ContainerName"] = podUid + "/" + container["name"]
               end
               #Pod restart count is a sumtotal of restart counts of individual containers

From 5c8c3922935ae1c7a98f21576b4a0277a08a80c7 Mon Sep 17 00:00:00 2001
From: rashmy <rashmy@RASHMY-PC2>
Date: Wed, 10 Jul 2019 13:34:43 -0700
Subject: [PATCH 12/12] removing buffer chunk size and buffer max size from
 fluentbit conf

---
 installer/conf/td-agent-bit.conf | 2 --
 1 file changed, 2 deletions(-)

diff --git a/installer/conf/td-agent-bit.conf b/installer/conf/td-agent-bit.conf
index e7aabd242..ab79710c7 100644
--- a/installer/conf/td-agent-bit.conf
+++ b/installer/conf/td-agent-bit.conf
@@ -12,8 +12,6 @@
     DB.Sync Off
     Parser docker
     Mem_Buf_Limit 10m
-    Buffer_Chunk_Size 1m
-    Buffer_Max_Size 1m
     Rotate_Wait 20
     Refresh_Interval 30
     Path_Key filepath