From 75a27c72d3009a52d53a2c78866e48dc8761ea53 Mon Sep 17 00:00:00 2001
From: Rashmi Mysore Chandrashekar <rashmy@microsoft.com>
Date: Thu, 5 Nov 2020 19:25:30 -0800
Subject: [PATCH 1/4] mdm exception aggregation changes

---
 source/plugins/ruby/constants.rb | 111 ++++++++++++++++---------------
 source/plugins/ruby/out_mdm.rb   |  32 +++++++++
 2 files changed, 88 insertions(+), 55 deletions(-)

diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index 0e5099c5e..f6ea41863 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -1,61 +1,61 @@
 # frozen_string_literal: true
 
 class Constants
-    INSIGHTSMETRICS_TAGS_ORIGIN = "container.azm.ms"
-    INSIGHTSMETRICS_TAGS_CLUSTERID = "container.azm.ms/clusterId"
-    INSIGHTSMETRICS_TAGS_CLUSTERNAME = "container.azm.ms/clusterName"
-    INSIGHTSMETRICS_TAGS_GPU_VENDOR = "gpuVendor"
-    INSIGHTSMETRICS_TAGS_GPU_NAMESPACE = "container.azm.ms/gpu"
-    INSIGHTSMETRICS_TAGS_GPU_MODEL = "gpuModel"
-    INSIGHTSMETRICS_TAGS_GPU_ID = "gpuId"
-    INSIGHTSMETRICS_TAGS_CONTAINER_NAME = "containerName"
-    INSIGHTSMETRICS_TAGS_CONTAINER_ID = "containerName"
-    INSIGHTSMETRICS_TAGS_K8SNAMESPACE = "k8sNamespace"
-    INSIGHTSMETRICS_TAGS_CONTROLLER_NAME = "controllerName"
-    INSIGHTSMETRICS_TAGS_CONTROLLER_KIND = "controllerKind"
-    INSIGHTSMETRICS_TAGS_POD_UID = "podUid"
-    INSIGTHTSMETRICS_TAGS_PV_NAMESPACE = "container.azm.ms/pv"
-    INSIGHTSMETRICS_TAGS_PVC_NAME = "pvcName"
-    INSIGHTSMETRICS_TAGS_PVC_NAMESPACE = "pvcNamespace"
-    INSIGHTSMETRICS_TAGS_POD_NAME = "podName"
-    INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES = "pvCapacityBytes"
-    INSIGHTSMETRICS_TAGS_VOLUME_NAME = "volumeName"
-    INSIGHTSMETRICS_FLUENT_TAG = "oms.api.InsightsMetrics"
-    REASON_OOM_KILLED = "oomkilled"
-    #Kubestate (common)
-    INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE = "container.azm.ms/kubestate"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME = "creationTime"
-    #Kubestate (deployments)
-    INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_DEPLOYMENT_STATE = "kube_deployment_status_replicas_ready"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_NAME = "deployment"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_CREATIONTIME = "creationTime"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STRATEGY = "deploymentStrategy"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_SPEC_REPLICAS = "spec_replicas"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_UPDATED = "status_replicas_updated"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_AVAILABLE = "status_replicas_available"
-    #Kubestate (HPA)
-    INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_HPA_STATE = "kube_hpa_status_current_replicas"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_NAME = "hpa"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MAX_REPLICAS = "spec_max_replicas"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MIN_REPLICAS = "spec_min_replicas"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_KIND = "targetKind"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_NAME = "targetName"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_DESIRED_REPLICAS = "status_desired_replicas"
-    
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_LAST_SCALE_TIME = "lastScaleTime"
-    # MDM Metric names
-    MDM_OOM_KILLED_CONTAINER_COUNT = "oomKilledContainerCount"
-    MDM_CONTAINER_RESTART_COUNT = "restartingContainerCount"
-    MDM_POD_READY_PERCENTAGE = "podReadyPercentage"
-    MDM_STALE_COMPLETED_JOB_COUNT = "completedJobsCount"
-    MDM_DISK_USED_PERCENTAGE = "diskUsedPercentage"
-    MDM_CONTAINER_CPU_UTILIZATION_METRIC = "cpuExceededPercentage"
-    MDM_CONTAINER_MEMORY_RSS_UTILIZATION_METRIC = "memoryRssExceededPercentage"
-    MDM_CONTAINER_MEMORY_WORKING_SET_UTILIZATION_METRIC = "memoryWorkingSetExceededPercentage"
-    MDM_PV_UTILIZATION_METRIC = "pvUsageExceededPercentage"
-    MDM_NODE_CPU_USAGE_PERCENTAGE = "cpuUsagePercentage"
-    MDM_NODE_MEMORY_RSS_PERCENTAGE = "memoryRssPercentage"
-    MDM_NODE_MEMORY_WORKING_SET_PERCENTAGE = "memoryWorkingSetPercentage"
+  INSIGHTSMETRICS_TAGS_ORIGIN = "container.azm.ms"
+  INSIGHTSMETRICS_TAGS_CLUSTERID = "container.azm.ms/clusterId"
+  INSIGHTSMETRICS_TAGS_CLUSTERNAME = "container.azm.ms/clusterName"
+  INSIGHTSMETRICS_TAGS_GPU_VENDOR = "gpuVendor"
+  INSIGHTSMETRICS_TAGS_GPU_NAMESPACE = "container.azm.ms/gpu"
+  INSIGHTSMETRICS_TAGS_GPU_MODEL = "gpuModel"
+  INSIGHTSMETRICS_TAGS_GPU_ID = "gpuId"
+  INSIGHTSMETRICS_TAGS_CONTAINER_NAME = "containerName"
+  INSIGHTSMETRICS_TAGS_CONTAINER_ID = "containerName"
+  INSIGHTSMETRICS_TAGS_K8SNAMESPACE = "k8sNamespace"
+  INSIGHTSMETRICS_TAGS_CONTROLLER_NAME = "controllerName"
+  INSIGHTSMETRICS_TAGS_CONTROLLER_KIND = "controllerKind"
+  INSIGHTSMETRICS_TAGS_POD_UID = "podUid"
+  INSIGTHTSMETRICS_TAGS_PV_NAMESPACE = "container.azm.ms/pv"
+  INSIGHTSMETRICS_TAGS_PVC_NAME = "pvcName"
+  INSIGHTSMETRICS_TAGS_PVC_NAMESPACE = "pvcNamespace"
+  INSIGHTSMETRICS_TAGS_POD_NAME = "podName"
+  INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES = "pvCapacityBytes"
+  INSIGHTSMETRICS_TAGS_VOLUME_NAME = "volumeName"
+  INSIGHTSMETRICS_FLUENT_TAG = "oms.api.InsightsMetrics"
+  REASON_OOM_KILLED = "oomkilled"
+  #Kubestate (common)
+  INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE = "container.azm.ms/kubestate"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME = "creationTime"
+  #Kubestate (deployments)
+  INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_DEPLOYMENT_STATE = "kube_deployment_status_replicas_ready"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_NAME = "deployment"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_CREATIONTIME = "creationTime"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STRATEGY = "deploymentStrategy"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_SPEC_REPLICAS = "spec_replicas"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_UPDATED = "status_replicas_updated"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_AVAILABLE = "status_replicas_available"
+  #Kubestate (HPA)
+  INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_HPA_STATE = "kube_hpa_status_current_replicas"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_NAME = "hpa"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MAX_REPLICAS = "spec_max_replicas"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MIN_REPLICAS = "spec_min_replicas"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_KIND = "targetKind"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_NAME = "targetName"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_DESIRED_REPLICAS = "status_desired_replicas"
+
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_LAST_SCALE_TIME = "lastScaleTime"
+  # MDM Metric names
+  MDM_OOM_KILLED_CONTAINER_COUNT = "oomKilledContainerCount"
+  MDM_CONTAINER_RESTART_COUNT = "restartingContainerCount"
+  MDM_POD_READY_PERCENTAGE = "podReadyPercentage"
+  MDM_STALE_COMPLETED_JOB_COUNT = "completedJobsCount"
+  MDM_DISK_USED_PERCENTAGE = "diskUsedPercentage"
+  MDM_CONTAINER_CPU_UTILIZATION_METRIC = "cpuExceededPercentage"
+  MDM_CONTAINER_MEMORY_RSS_UTILIZATION_METRIC = "memoryRssExceededPercentage"
+  MDM_CONTAINER_MEMORY_WORKING_SET_UTILIZATION_METRIC = "memoryWorkingSetExceededPercentage"
+  MDM_PV_UTILIZATION_METRIC = "pvUsageExceededPercentage"
+  MDM_NODE_CPU_USAGE_PERCENTAGE = "cpuUsagePercentage"
+  MDM_NODE_MEMORY_RSS_PERCENTAGE = "memoryRssPercentage"
+  MDM_NODE_MEMORY_WORKING_SET_PERCENTAGE = "memoryWorkingSetPercentage"
 
   CONTAINER_TERMINATED_RECENTLY_IN_MINUTES = 5
   OBJECT_NAME_K8S_CONTAINER = "K8SContainer"
@@ -88,6 +88,7 @@ class Constants
   KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 15
   ZERO_FILL_METRICS_INTERVAL_IN_MINUTES = 30
   MDM_TIME_SERIES_FLUSHED_IN_LAST_HOUR = "MdmTimeSeriesFlushedInLastHour"
+  MDM_EXCEPTION_TELEMETRY_METRIC = "AKSCustomMetricsMdmExceptions"
 
   #Pod Statuses
   POD_STATUS_TERMINATING = "Terminating"
diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb
index 1c805255a..328281c5a 100644
--- a/source/plugins/ruby/out_mdm.rb
+++ b/source/plugins/ruby/out_mdm.rb
@@ -50,6 +50,10 @@ def initialize
       @cluster_identity = nil
       @isArcK8sCluster = false
       @get_access_token_backoff_expiry = Time.now
+
+      @mdm_server_exceptions_hash = {}
+      @mdm_server_exception_count = 0
+      @@mdm_exception_telemetry_time_tracker = DateTime.now.to_time.to_i
     end
 
     def configure(conf)
@@ -221,6 +225,20 @@ def format(tag, time, record)
       end
     end
 
+    def exception_aggregator(response_code)
+      begin
+        if (@mdm_exceptions_hash[response_code].nil?)
+          @mdm_exceptions_hash[response_code] = 1
+        else
+          @mdm_exceptions_hash[response_code] += 1
+        end
+        @mdm_server_exception_count += 1
+      rescue => error
+        @log.info "Error in MDM exception_aggregator method: #{error}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(error)
+      end
+    end
+
     # This method is called every flush interval. Send the buffer chunk to MDM.
     # 'chunk' is a buffer chunk that includes multiple formatted records
     def write(chunk)
@@ -246,6 +264,16 @@ def write(chunk)
             @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time) / 60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP"
           end
         end
+        timeDifference = (DateTime.now.to_time.to_i - @@mdm_exception_telemetry_time_tracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if (timeDifferenceInMinutes >= 60)
+          telemetryProperties = {}
+          telemetryProperties["ExceptionsCountForLastHour"] = @mdm_exceptions_hash.to_json
+          ApplicationInsightsUtility.sendMetricTelemetry(Constants::MDM_EXCEPTION_TELEMETRY_METRIC, @mdm_server_exception_count, telemetryProperties)
+          # Resetting values after flushing
+          @mdm_server_exception_count = 0
+          @mdm_exceptions_hash = {}
+        end
       rescue Exception => e
         ApplicationInsightsUtility.sendExceptionTelemetry(e)
         @log.info "Exception when writing to MDM: #{e}"
@@ -295,8 +323,12 @@ def send_to_mdm(post_body)
         else
           # raise if the response code is non-400
           @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}"
+          # Adding server exceptions to hash to aggregate and send telemetry
+          exception_aggregator(response.code)
           raise e
         end
+        # Adding exceptions to hash to aggregate and send telemetry
+        exception_aggregator(response.code)
       rescue Errno::ETIMEDOUT => e
         @log.info "Timed out when POSTing Metrics to MDM : #{e} Response: #{response}"
         @log.debug_backtrace(e.backtrace)

From 9966e7ffed23e933773f245e4883ad98d32f1d8b Mon Sep 17 00:00:00 2001
From: Rashmi Mysore Chandrashekar <rashmy@microsoft.com>
Date: Mon, 9 Nov 2020 16:11:29 -0800
Subject: [PATCH 2/4] changes for mdm exception aggregator

---
 source/plugins/ruby/constants.rb |  1 +
 source/plugins/ruby/out_mdm.rb   | 36 ++++++++++++++++----------------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index f6ea41863..079584c7b 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -89,6 +89,7 @@ class Constants
   ZERO_FILL_METRICS_INTERVAL_IN_MINUTES = 30
   MDM_TIME_SERIES_FLUSHED_IN_LAST_HOUR = "MdmTimeSeriesFlushedInLastHour"
   MDM_EXCEPTION_TELEMETRY_METRIC = "AKSCustomMetricsMdmExceptions"
+  MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL = 30
 
   #Pod Statuses
   POD_STATUS_TERMINATING = "Terminating"
diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb
index 328281c5a..7ee2eb1ff 100644
--- a/source/plugins/ruby/out_mdm.rb
+++ b/source/plugins/ruby/out_mdm.rb
@@ -52,7 +52,7 @@ def initialize
       @get_access_token_backoff_expiry = Time.now
 
       @mdm_server_exceptions_hash = {}
-      @mdm_server_exception_count = 0
+      @mdm_exceptions_count = 0
       @@mdm_exception_telemetry_time_tracker = DateTime.now.to_time.to_i
     end
 
@@ -225,14 +225,16 @@ def format(tag, time, record)
       end
     end
 
-    def exception_aggregator(response_code)
+    def exception_aggregator(error)
       begin
-        if (@mdm_exceptions_hash[response_code].nil?)
-          @mdm_exceptions_hash[response_code] = 1
+        errorStr = error.to_s
+        if (@mdm_exceptions_hash[errorStr].nil?)
+          @mdm_exceptions_hash[errorStr] = 1
         else
-          @mdm_exceptions_hash[response_code] += 1
+          @mdm_exceptions_hash[errorStr] += 1
         end
-        @mdm_server_exception_count += 1
+        #Keeping track of all exceptions to send the total in the last flush interval as a metric
+        @mdm_exceptions_count += 1
       rescue => error
         @log.info "Error in MDM exception_aggregator method: #{error}"
         ApplicationInsightsUtility.sendExceptionTelemetry(error)
@@ -264,18 +266,21 @@ def write(chunk)
             @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time) / 60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP"
           end
         end
+        #Flush out exception telemetry as a metric for the last 30 minutes
         timeDifference = (DateTime.now.to_time.to_i - @@mdm_exception_telemetry_time_tracker).abs
         timeDifferenceInMinutes = timeDifference / 60
-        if (timeDifferenceInMinutes >= 60)
+        if (timeDifferenceInMinutes >= Constants::MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL)
           telemetryProperties = {}
-          telemetryProperties["ExceptionsCountForLastHour"] = @mdm_exceptions_hash.to_json
-          ApplicationInsightsUtility.sendMetricTelemetry(Constants::MDM_EXCEPTION_TELEMETRY_METRIC, @mdm_server_exception_count, telemetryProperties)
+          telemetryProperties["ExceptionsHashForFlushInterval"] = @mdm_exceptions_hash.to_json
+          telemetryProperties["FlushInterval"] = Constants::MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL
+          ApplicationInsightsUtility.sendMetricTelemetry(Constants::MDM_EXCEPTION_TELEMETRY_METRIC, @mdm_exceptions_count, telemetryProperties)
           # Resetting values after flushing
-          @mdm_server_exception_count = 0
+          @mdm_exceptions_count = 0
           @mdm_exceptions_hash = {}
         end
       rescue Exception => e
-        ApplicationInsightsUtility.sendExceptionTelemetry(e)
+        # Adding exceptions to hash to aggregate and send telemetry for all write errors
+        exception_aggregator(e)
         @log.info "Exception when writing to MDM: #{e}"
         raise e
       end
@@ -310,7 +315,6 @@ def send_to_mdm(post_body)
         else
           @log.info "Failed to Post Metrics to MDM : #{e} Response: #{response}"
         end
-        #@log.info "MDM request : #{post_body}"
         @log.debug_backtrace(e.backtrace)
         if !response.code.empty? && response.code == 403.to_s
           @log.info "Response Code #{response.code} Updating @last_post_attempt_time"
@@ -323,21 +327,17 @@ def send_to_mdm(post_body)
         else
           # raise if the response code is non-400
           @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}"
-          # Adding server exceptions to hash to aggregate and send telemetry
-          exception_aggregator(response.code)
           raise e
         end
-        # Adding exceptions to hash to aggregate and send telemetry
-        exception_aggregator(response.code)
+        # Adding exceptions to hash to aggregate and send telemetry for all 400 error codes
+        exception_aggregator(e)
       rescue Errno::ETIMEDOUT => e
         @log.info "Timed out when POSTing Metrics to MDM : #{e} Response: #{response}"
         @log.debug_backtrace(e.backtrace)
-        ApplicationInsightsUtility.sendExceptionTelemetry(e)
         raise e
       rescue Exception => e
         @log.info "Exception POSTing Metrics to MDM : #{e} Response: #{response}"
         @log.debug_backtrace(e.backtrace)
-        ApplicationInsightsUtility.sendExceptionTelemetry(e)
         raise e
       end
     end

From 94023d27c6382ba2c2b3b2650ac55fcc5fe1766f Mon Sep 17 00:00:00 2001
From: Rashmi Mysore Chandrashekar <rashmy@microsoft.com>
Date: Mon, 9 Nov 2020 16:20:50 -0800
Subject: [PATCH 3/4] moving changes up

---
 source/plugins/ruby/out_mdm.rb | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb
index 7ee2eb1ff..546fbea97 100644
--- a/source/plugins/ruby/out_mdm.rb
+++ b/source/plugins/ruby/out_mdm.rb
@@ -241,10 +241,32 @@ def exception_aggregator(error)
       end
     end
 
+    def flush_mdm_exception_telemetry
+      begin
+        #Flush out exception telemetry as a metric for the last 30 minutes
+        timeDifference = (DateTime.now.to_time.to_i - @@mdm_exception_telemetry_time_tracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if (timeDifferenceInMinutes >= Constants::MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL)
+          telemetryProperties = {}
+          telemetryProperties["ExceptionsHashForFlushInterval"] = @mdm_exceptions_hash.to_json
+          telemetryProperties["FlushInterval"] = Constants::MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL
+          ApplicationInsightsUtility.sendMetricTelemetry(Constants::MDM_EXCEPTION_TELEMETRY_METRIC, @mdm_exceptions_count, telemetryProperties)
+          # Resetting values after flushing
+          @mdm_exceptions_count = 0
+          @mdm_exceptions_hash = {}
+        end
+      rescue => error
+        @log.info "Error in flush_mdm_exception_telemetry method: #{error}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(error)
+      end
+    end
+
     # This method is called every flush interval. Send the buffer chunk to MDM.
     # 'chunk' is a buffer chunk that includes multiple formatted records
     def write(chunk)
       begin
+        # Adding this before trying to flush out metrics, since adding after can lead to metrics never being sent
+        flush_mdm_exception_telemetry
         if (!@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes * 60)) && @can_send_data_to_mdm
           post_body = []
           chunk.msgpack_each { |(tag, record)|
@@ -266,18 +288,6 @@ def write(chunk)
             @log.info "Last Failed POST attempt to MDM was made #{((Time.now - @last_post_attempt_time) / 60).round(1)} min ago. This is less than the current retry threshold of #{@retry_mdm_post_wait_minutes} min. NO-OP"
           end
         end
-        #Flush out exception telemetry as a metric for the last 30 minutes
-        timeDifference = (DateTime.now.to_time.to_i - @@mdm_exception_telemetry_time_tracker).abs
-        timeDifferenceInMinutes = timeDifference / 60
-        if (timeDifferenceInMinutes >= Constants::MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL)
-          telemetryProperties = {}
-          telemetryProperties["ExceptionsHashForFlushInterval"] = @mdm_exceptions_hash.to_json
-          telemetryProperties["FlushInterval"] = Constants::MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL
-          ApplicationInsightsUtility.sendMetricTelemetry(Constants::MDM_EXCEPTION_TELEMETRY_METRIC, @mdm_exceptions_count, telemetryProperties)
-          # Resetting values after flushing
-          @mdm_exceptions_count = 0
-          @mdm_exceptions_hash = {}
-        end
       rescue Exception => e
         # Adding exceptions to hash to aggregate and send telemetry for all write errors
         exception_aggregator(e)

From 3cd6f5ee7b652fc610a69c0a25c4d94feee3b184 Mon Sep 17 00:00:00 2001
From: Rashmi Mysore Chandrashekar <rashmy@microsoft.com>
Date: Mon, 9 Nov 2020 17:27:07 -0800
Subject: [PATCH 4/4] changes

---
 source/plugins/ruby/out_mdm.rb | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb
index 546fbea97..6238eb51a 100644
--- a/source/plugins/ruby/out_mdm.rb
+++ b/source/plugins/ruby/out_mdm.rb
@@ -51,9 +51,9 @@ def initialize
       @isArcK8sCluster = false
       @get_access_token_backoff_expiry = Time.now
 
-      @mdm_server_exceptions_hash = {}
+      @mdm_exceptions_hash = {}
       @mdm_exceptions_count = 0
-      @@mdm_exception_telemetry_time_tracker = DateTime.now.to_time.to_i
+      @mdm_exception_telemetry_time_tracker = DateTime.now.to_time.to_i
     end
 
     def configure(conf)
@@ -244,7 +244,7 @@ def exception_aggregator(error)
     def flush_mdm_exception_telemetry
       begin
         #Flush out exception telemetry as a metric for the last 30 minutes
-        timeDifference = (DateTime.now.to_time.to_i - @@mdm_exception_telemetry_time_tracker).abs
+        timeDifference = (DateTime.now.to_time.to_i - @mdm_exception_telemetry_time_tracker).abs
         timeDifferenceInMinutes = timeDifference / 60
         if (timeDifferenceInMinutes >= Constants::MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL)
           telemetryProperties = {}
@@ -254,6 +254,7 @@ def flush_mdm_exception_telemetry
           # Resetting values after flushing
           @mdm_exceptions_count = 0
           @mdm_exceptions_hash = {}
+          @mdm_exception_telemetry_time_tracker = DateTime.now.to_time.to_i
         end
       rescue => error
         @log.info "Error in flush_mdm_exception_telemetry method: #{error}"