From eb2c5f34e6e4bf460e1443670cb7aacff1bb30b2 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 4 Aug 2020 17:00:24 -0700
Subject: [PATCH 01/60] separate build yamls for ci_prod branch (#415)

---
 ...l.all_tag.all_phase.all_config.ci_prod.yml | 44 +++++++++++++++
 ...l.all_tag.all_phase.all_config.ci_prod.yml | 55 +++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 .pipelines/pipeline.user.linux.official.all_tag.all_phase.all_config.ci_prod.yml
 create mode 100644 .pipelines/pipeline.user.windows.official.all_tag.all_phase.all_config.ci_prod.yml

diff --git a/.pipelines/pipeline.user.linux.official.all_tag.all_phase.all_config.ci_prod.yml b/.pipelines/pipeline.user.linux.official.all_tag.all_phase.all_config.ci_prod.yml
new file mode 100644
index 000000000..d47a60ffe
--- /dev/null
+++ b/.pipelines/pipeline.user.linux.official.all_tag.all_phase.all_config.ci_prod.yml
@@ -0,0 +1,44 @@
+environment:
+  host:
+    os: 'linux'
+    flavor: 'ubuntu'
+    version: '16.04'
+  runtime:
+    provider: 'appcontainer'
+    image: 'cdpxlinux.azurecr.io/user/azure-monitor/container-insights:1.0'
+
+version:
+  name: 'DockerProvider'
+  major: 10
+  minor: 0
+  tag: 'beta'
+  system: 'custom'
+  exclude_commit: true
+
+restore:
+  commands:
+    - !!defaultcommand
+      name: 'get go modules'
+      command: '.pipelines/restore-linux.sh'
+      fail_on_stderr: false
+
+build:
+  commands:
+    - !!defaultcommand
+      name: 'Build Docker Provider Shell Bundle'
+      command: '.pipelines/build-linux.sh'
+      fail_on_stderr: false
+
+package:
+  commands:
+    - !!dockerbuildcommand                               # REQUIRED: This maps the command data to a concrete type in the CDPX orchestrator.
+      name: 'Build Docker Image'                          # REQUIRED: All commands have a name field. All console output captured when
+                                                          #           this command runs is tagged with the value of this field.
+      context_folder: 'kubernetes/linux'                  # REQUIRED: The repository root relative path of the folder containing the Dockerfile to build.
+                                                          # In effect, the context folder will be repository_checkout_folder/src/DockerFinal.
+      dockerfile_name: 'Dockerfile'                       # OPTIONAL: The name of the dockerfile. Docker client does allow the Dockerfile
+                                                          # to be named differently. Defaults to Dockerfile.
+                                                          # In effect, the -f option value passed to docker build will be repository_checkout_folder/src/DockerFinal/Foo.dockerfile.
+      repository_name: 'cdpxlinux'                        # only supported ones are cdpx acr repos
+      tag: 'ciprod'                                        # OPTIONAL: Defaults to latest. The tag for the built image. Final tag will be 1.0.0alpha, 1.0.0-timestamp-commitID.
+      latest: false                                       # OPTIONAL: Defaults to false. If tag is not set to latest and this flag is set, then tag as latest as well and push latest as well.
diff --git a/.pipelines/pipeline.user.windows.official.all_tag.all_phase.all_config.ci_prod.yml b/.pipelines/pipeline.user.windows.official.all_tag.all_phase.all_config.ci_prod.yml
new file mode 100644
index 000000000..e0286fbd6
--- /dev/null
+++ b/.pipelines/pipeline.user.windows.official.all_tag.all_phase.all_config.ci_prod.yml
@@ -0,0 +1,55 @@
+environment:
+  host:
+    os: 'windows'
+    flavor: 'server'
+    version: '2019'
+  runtime:
+    provider: 'appcontainer'
+    image: 'cdpxwin1809.azurecr.io/user/azure-monitor/container-insights:6.0'
+    source_mode: 'map'
+
+version:
+  name: 'Certificate Generator and Out OMS plugin'
+  major: 10
+  minor: 0
+  tag: 'beta'
+  system: 'custom'
+  exclude_commit: true
+
+signing_options:
+  profile: 'azure'
+  codesign_validation_glob_pattern: 'regex|.+(?:dll|exe|sys|ps1|psm1|ps1xml|psc1|psd1|cdxml|vbs|js|wsf)$;-:file|**\linux\**' #CSV does not currently support binaries built for linux, so we exclude this folder
+
+static_analysis_options:
+  binskim_options:
+    files_to_scan:
+      - from: 'build\windows\installer\certificategenerator\bin\'
+        exclude: # exclude binaries which are referenced via dotnet packages and not built by us
+          - '**/**/**/BouncyCastle.Crypto.dll'
+          - '**/**/**/**/BouncyCastle.Crypto.dll'
+restore:
+  commands:
+    - !!defaultcommand
+       name: 'Restore dotnet packages'
+       command: '.pipelines/restore-windows.cmd'
+
+build:
+  commands:
+    - !!defaultcommand
+      name: 'Build Certificate Generator Source code and Out OMS Go plugin code'
+      command: '.pipelines/build-windows.cmd'
+      fail_on_stderr: false
+
+package:
+  commands:
+    - !!dockerbuildcommand                                # REQUIRED: This maps the command data to a concrete type in the CDPX orchestrator.
+      name: 'Build Docker Image'                          # REQUIRED: All commands have a name field. All console output captured when
+                                                          #           this command runs is tagged with the value of this field.
+      context_folder: 'kubernetes/windows'                # REQUIRED: The repository root relative path of the folder containing the Dockerfile to build.
+                                                          # In effect, the context folder will be repository_checkout_folder/src/DockerFinal.
+      dockerfile_name: 'Dockerfile'                       # OPTIONAL: The name of the dockerfile. Docker client does allow the Dockerfile
+                                                          # to be named differently. Defaults to Dockerfile.
+                                                          # In effect, the -f option value passed to docker build will be repository_checkout_folder/src/DockerFinal/Foo.dockerfile.
+      repository_name: 'cdpxwin1809'                      # only supported ones are cdpx acr repos
+      tag: 'win-ciprod'                                    # OPTIONAL: Defaults to latest. The tag for the built image. Final tag will be 1.0.0alpha, 1.0.0-timestamp-commitID.
+      latest: false                                       # OPTIONAL: Defaults to false. If tag is not set to latest and this flag is set, then tag as latest as well and push latest as well.

From df29e35c0b5d5a4bf73bb833f9939bda40ee0732 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 5 Aug 2020 17:45:14 -0700
Subject: [PATCH 02/60] re-enable adx path (#420)

---
 source/plugins/go/src/oms.go | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/source/plugins/go/src/oms.go b/source/plugins/go/src/oms.go
index 88c5641f7..63ca6de10 100644
--- a/source/plugins/go/src/oms.go
+++ b/source/plugins/go/src/oms.go
@@ -1323,9 +1323,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 		ContainerLogsRouteV2 = true
 		Log("Routing container logs thru %s route...", ContainerLogsV2Route)
 		fmt.Fprintf(os.Stdout, "Routing container logs thru %s route... \n", ContainerLogsV2Route)
-	//} else if strings.Compare(ContainerLogsRoute, ContainerLogsADXRoute) == 0 {
-	//making dormant with below comparison for now --
-	} else if strings.Compare("willnot", "match") == 0 {
+	} else if strings.Compare(ContainerLogsRoute, ContainerLogsADXRoute) == 0 {
 		//check if adx clusteruri, clientid & secret are set
 		var err error
 		AdxClusterUri, err = ReadFileContents(PluginConfiguration["adx_cluster_uri_path"])

From bcc8506e4d4a1114307d3d13ad09111ada9c367e Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 5 Aug 2020 18:17:12 -0700
Subject: [PATCH 03/60] Gangams/release changes (#419)

* updates related to release

* updates related to release

* fix the incorrect version

* fix pr feedback

* fix some typos in the release notes
---
 README.md                                  |  4 ++--
 ReleaseNotes.md                            | 16 ++++++++++++++++
 ReleaseProcess.md                          | 16 +++++++++++-----
 build/version                              |  4 ++--
 charts/azuremonitor-containers/Chart.yaml  |  2 +-
 charts/azuremonitor-containers/values.yaml |  6 +++---
 kubernetes/linux/Dockerfile                |  2 +-
 kubernetes/omsagent.yaml                   | 12 ++++++------
 kubernetes/windows/Dockerfile              |  2 +-
 9 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 06d3606c0..659fe0161 100644
--- a/README.md
+++ b/README.md
@@ -213,7 +213,7 @@ powershell -ExecutionPolicy bypass  # switch to powershell if you are not on pow
 
 # Azure DevOps Build Pipeline
 
-Navigate to https://github-private.visualstudio.com/microsoft/_build?view=pipelines to see Linux and Windows Agent build pipelines. These pipelines are configured with CI triggers for ci_dev and ci_prod (TBD).
+Navigate to https://github-private.visualstudio.com/microsoft/_build?view=pipelines to see Linux and Windows Agent build pipelines. These pipelines are configured with CI triggers for ci_dev and ci_prod.
 
 Docker Images will be pushed to CDPX ACR repos and these needs to retagged and pushed to corresponding ACR or docker hub. Only onboarded Azure AD AppId has permission to pull the images from CDPx ACRs.
 
@@ -236,7 +236,7 @@ Here are the instructions to onboard the feature branch to Azure Dev Ops pipelin
 
 # Azure DevOps Release Pipeline
 
-Integrated to Azure DevOps release pipeline for the ci_dev and ci_prod (TBD).With this, for every commit to ci_dev branch, latest bits automatically deployded to DEV AKS clusters in Build subscription and similarly for for every commit to ci_prod branch, latest bits automatically deployed to PROD AKS clusters in Build subscription.
+Integrated to Azure DevOps release pipeline for the ci_dev and ci_prod.With this, for every commit to ci_dev branch, latest bits automatically deployded to DEV AKS clusters in Build subscription and similarly for for every commit to ci_prod branch, latest bits automatically deployed to PROD AKS clusters in Build subscription.
 
 For dev, agent image will be in this format mcr.microsoft.com/azuremonitor/containerinsights/cidev:cidev<git-commit-id>.
 For prod, agent will be in this format mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod`<MM><DD><YYYY>`.
diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index aa57d8388..933900b89 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -11,6 +11,22 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
 
+### 08/05/2020 -
+##### Version microsoft/oms:ciprod08052020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08052020 (linux)
+##### Version microsoft/oms:win-ciprod08052020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod08052020 (windows)
+##### Code change log
+- Collection of KubeState metrics for deployments and HPA
+- Add the Proxy support for Windows agent
+- Fix for ContainerState in ContainerInventory to handle Failed state and collection of environment variables for terminated and failed containers
+- Change /spec to /metrics/cadvisor endpoint to collect node capacity metrics
+- Disable Health Plugin by default and can enabled via configmap
+- Pin version of jq to 1.5+dfsg-2
+- Bug fix for showing node as 'not ready' when there is disk pressure
+- oneagent integration (disabled by default)
+- Add region check before sending alertable metrics to MDM
+- Telemetry fix for agent telemetry for sov. clouds
+
+
 ### 07/15/2020 -
 ##### Version microsoft/oms:ciprod07152020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod07152020 (linux)
 ##### Version microsoft/oms:win-ciprod05262020-2 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod05262020-2 (windows)
diff --git a/ReleaseProcess.md b/ReleaseProcess.md
index 38ff1ab69..5ec42d496 100644
--- a/ReleaseProcess.md
+++ b/ReleaseProcess.md
@@ -5,15 +5,21 @@
 Here are the high-level instructions to get the CIPROD`<MM><DD><YYYY>` image for the production release
 1. create feature branch from ci_dev and make the following updates
       > Note: This required since Azure Dev Ops pipeline doesnt support --build-arg yet to automate this.
-   -  Ensure IMAGE_TAG updated with release candiate image tag in the DockerFile under kubernetes/linux and kubernetes/windows directory
-   - Update omsagent.yaml if there are any changes to the yaml
+   - Ensure IMAGE_TAG updated with release candiate image tag in the DockerFile under kubernetes/linux and kubernetes/windows directory
+   - Update the version file under build directory with build version and date
+   - Update omsagent.yaml for the image tag and dockerProviderVersion, and any other changes
+   - Update the chart version and image tags in values.yaml under charts/azuremonitor-containers
    - Release notes
 2. Make PR to ci_dev branch and once the PR approved, merge the changes to ci_dev
 3. Latest bits of ci_dev automatically deployed to CIDEV cluster in build subscription so just validated E2E to make sure everthing works
 4. If everything validated in DEV, make merge PR from ci_dev and ci_prod and merge once this reviewed by dev team
-5. Merge ci_dev and ci_prod branch which will trigger automatic deployment of latest bits to CIPROD cluster with CIPROD`<MM><DD><YYYY>` image (TBD)
+6. Update following pipeline variables under ReleaseCandiate with version of chart and image tag
+    - CIHELMCHARTVERSION <VersionValue> # For example, 2.7.4
+    - CIImageTagSuffix <ImageTag> # ciprod08052020 or ciprod08052020-1 etc.
+7. Merge ci_dev and ci_prod branch which will trigger automatic deployment of latest bits to CIPROD cluster with CIPROD`<MM><DD><YYYY>` image to test and scale cluters, AKS, AKS-Engine
    > Note: production image automatically pushed to CIPROD Public cloud ACR which will inturn replicated to Public cloud MCR.
-6. Validate all the scenarios against CIPROD cluster in Build subscription
+8. Validate all the scenarios against clusters in build subscription and scale clusters
+
 
 # 2. Perf and scale testing
 
@@ -27,7 +33,7 @@ Image automatically synched to MCR CN from Public cloud MCR.
 
 ## AKS
 
-Make PR against [AKS-RP](https://msazure.visualstudio.com/CloudNativeCompute/_git/aks-rp?version=GBmaster) repo with chart update(s)
+- Refer to internal docs for the release process and instructions.
 
 ## ARO v3
 
diff --git a/build/version b/build/version
index b856fc312..f26973116 100644
--- a/build/version
+++ b/build/version
@@ -5,8 +5,8 @@
 CONTAINER_BUILDVERSION_MAJOR=10
 CONTAINER_BUILDVERSION_MINOR=0
 CONTAINER_BUILDVERSION_PATCH=0
-CONTAINER_BUILDVERSION_BUILDNR=1
-CONTAINER_BUILDVERSION_DATE=20200526
+CONTAINER_BUILDVERSION_BUILDNR=4
+CONTAINER_BUILDVERSION_DATE=20200805
 CONTAINER_BUILDVERSION_STATUS=Developer_Build
 
 #-------------------------------- End of File -----------------------------------
diff --git a/charts/azuremonitor-containers/Chart.yaml b/charts/azuremonitor-containers/Chart.yaml
index 8a84692e7..202494152 100644
--- a/charts/azuremonitor-containers/Chart.yaml
+++ b/charts/azuremonitor-containers/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v1
 appVersion: 7.0.0-1
 description: Helm chart for deploying Azure Monitor container monitoring agent in Kubernetes
 name: azuremonitor-containers
-version: 2.7.3
+version: 2.7.4
 kubeVersion: "^1.10.0-0"
 keywords:
   - monitoring
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 685c767bb..927d24b35 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -7,10 +7,10 @@
 omsagent:
   image:
     repo: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod"
-    tag: "ciprod07152020"
-    tagWindows: "win-ciprod05262020-2"
+    tag: "ciprod08052020"
+    tagWindows: "win-ciprod08052020"
     pullPolicy: IfNotPresent
-    dockerProviderVersion: "10.0.0-3"
+    dockerProviderVersion: "10.0.0-4"
     agentVersion: "1.10.0.1"
   ## To get your workspace id and key do the following
   ## You can create a Azure Loganalytics workspace from portal.azure.com and get its ID & PRIMARY KEY from 'Advanced Settings' tab in the Ux.
diff --git a/kubernetes/linux/Dockerfile b/kubernetes/linux/Dockerfile
index c8b61995d..c82532471 100644
--- a/kubernetes/linux/Dockerfile
+++ b/kubernetes/linux/Dockerfile
@@ -2,7 +2,7 @@ FROM ubuntu:18.04
 MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
-ARG IMAGE_TAG=ciprod07152020
+ARG IMAGE_TAG=ciprod08052020
 ENV AGENT_VERSION ${IMAGE_TAG}
 ENV tmpdir /opt
 ENV APPLICATIONINSIGHTS_AUTH NzAwZGM5OGYtYTdhZC00NThkLWI5NWMtMjA3ZjM3NmM3YmRi
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 038c7e92b..ac712722a 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -337,13 +337,13 @@ spec:
         tier: node
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.0.0-3"
+        dockerProviderVersion: "10.0.0-4"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod07152020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08052020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -480,13 +480,13 @@ spec:
         rsName: "omsagent-rs"
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.0.0-3"
+        dockerProviderVersion: "10.0.0-4"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod07152020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08052020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -631,13 +631,13 @@ spec:
         tier: node-win
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.0.0-2"
+        dockerProviderVersion: "10.0.0-4"
         schema-versions: "v1"
     spec:
      serviceAccountName: omsagent
      containers:
        - name: omsagent-win
-         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod05262020-2"
+         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod08052020"
          imagePullPolicy: IfNotPresent
          resources:
           limits:
diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index 9a5e22e0d..0b81b9c71 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -3,7 +3,7 @@ MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
 
-ARG IMAGE_TAG=win-ciprod05262020-2
+ARG IMAGE_TAG=win-ciprod08052020
 
 SHELL ["powershell"]
 

From 39534d6116ca5df1325768e681646b5d6010ea6b Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Wed, 5 Aug 2020 20:06:46 -0700
Subject: [PATCH 04/60] fix for zero filled metrics (#423)

---
 source/plugins/ruby/podinventory_to_mdm.rb | 98 +++++++++++-----------
 1 file changed, 51 insertions(+), 47 deletions(-)

diff --git a/source/plugins/ruby/podinventory_to_mdm.rb b/source/plugins/ruby/podinventory_to_mdm.rb
index dd5a15990..834515969 100644
--- a/source/plugins/ruby/podinventory_to_mdm.rb
+++ b/source/plugins/ruby/podinventory_to_mdm.rb
@@ -93,67 +93,71 @@ def initialize(custom_metrics_azure_regions)
   end
 
   def get_pod_inventory_mdm_records(batch_time)
+    records = []
     begin
-      # generate all possible values of non_phase_dim_values X pod Phases and zero-fill the ones that are not already present
-      @no_phase_dim_values_hash.each { |key, value|
-        @@pod_phase_values.each { |phase|
-          pod_key = [key, phase].join("~~")
-          if !@pod_count_hash.key?(pod_key)
-            @pod_count_hash[pod_key] = 0
-          else
+      if @process_incoming_stream
+        # generate all possible values of non_phase_dim_values X pod Phases and zero-fill the ones that are not already present
+        @no_phase_dim_values_hash.each { |key, value|
+          @@pod_phase_values.each { |phase|
+            pod_key = [key, phase].join("~~")
+            if !@pod_count_hash.key?(pod_key)
+              @pod_count_hash[pod_key] = 0
+            else
+              next
+            end
+          }
+        }
+        @pod_count_hash.each { |key, value|
+          key_elements = key.split("~~")
+          if key_elements.length != 4
             next
           end
-        }
-      }
-      records = []
-      @pod_count_hash.each { |key, value|
-        key_elements = key.split("~~")
-        if key_elements.length != 4
-          next
-        end
 
-        # get dimension values by key
-        podNodeDimValue = key_elements[0]
-        podNamespaceDimValue = key_elements[1]
-        podControllerNameDimValue = key_elements[2]
-        podPhaseDimValue = key_elements[3]
+          # get dimension values by key
+          podNodeDimValue = key_elements[0]
+          podNamespaceDimValue = key_elements[1]
+          podControllerNameDimValue = key_elements[2]
+          podPhaseDimValue = key_elements[3]
 
-        record = @@pod_inventory_custom_metrics_template % {
-          timestamp: batch_time,
-          metricName: @@pod_count_metric_name,
-          phaseDimValue: podPhaseDimValue,
-          namespaceDimValue: podNamespaceDimValue,
-          nodeDimValue: podNodeDimValue,
-          controllerNameDimValue: podControllerNameDimValue,
-          podCountMetricValue: value,
+          record = @@pod_inventory_custom_metrics_template % {
+            timestamp: batch_time,
+            metricName: @@pod_count_metric_name,
+            phaseDimValue: podPhaseDimValue,
+            namespaceDimValue: podNamespaceDimValue,
+            nodeDimValue: podNodeDimValue,
+            controllerNameDimValue: podControllerNameDimValue,
+            podCountMetricValue: value,
+          }
+          records.push(JSON.parse(record))
         }
-        records.push(JSON.parse(record))
-      }
 
-      #Add pod metric records
-      records = MdmMetricsGenerator.appendAllPodMetrics(records, batch_time)
+        #Add pod metric records
+        records = MdmMetricsGenerator.appendAllPodMetrics(records, batch_time)
 
-      #Send telemetry for pod metrics
-      timeDifference = (DateTime.now.to_time.to_i - @@metricTelemetryTimeTracker).abs
-      timeDifferenceInMinutes = timeDifference / 60
-      if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
-        MdmMetricsGenerator.flushPodMdmMetricTelemetry
-        @@metricTelemetryTimeTracker = DateTime.now.to_time.to_i
-      end
+        #Send telemetry for pod metrics
+        timeDifference = (DateTime.now.to_time.to_i - @@metricTelemetryTimeTracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
+          MdmMetricsGenerator.flushPodMdmMetricTelemetry
+          @@metricTelemetryTimeTracker = DateTime.now.to_time.to_i
+        end
 
-      # Clearing out all hashes after telemetry is flushed
-      MdmMetricsGenerator.clearPodHashes
+        # Clearing out all hashes after telemetry is flushed
+        MdmMetricsGenerator.clearPodHashes
+      end
     rescue Exception => e
       @log.info "Error processing pod inventory record Exception: #{e.class} Message: #{e.message}"
       ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
       return []
     end
-    @log.info "Pod Count To Phase #{@pod_count_by_phase} "
-    @log.info "resetting convertor state "
-    @pod_count_hash = {}
-    @no_phase_dim_values_hash = {}
-    @pod_count_by_phase = {}
-    @pod_uids = {}
+    if @process_incoming_stream
+      @log.info "Pod Count To Phase #{@pod_count_by_phase} "
+      @log.info "resetting convertor state "
+      @pod_count_hash = {}
+      @no_phase_dim_values_hash = {}
+      @pod_count_by_phase = {}
+      @pod_uids = {}
+    end
     return records
   end
 

From 5e0b42909bc63886dbf5433545d921a8237ef1e0 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Fri, 7 Aug 2020 13:26:21 -0700
Subject: [PATCH 05/60] consolidate windows agent image docker files (#422)

* consolidate windows agent image docker files

* revert docker file consolidation

* revert readme updates

* merge back windows dockerfiles

* image tag update
---
 ReleaseNotes.md                            |  6 ++---
 ReleaseProcess.md                          |  2 +-
 charts/azuremonitor-containers/values.yaml |  4 +--
 kubernetes/linux/Dockerfile                |  2 +-
 kubernetes/omsagent.yaml                   |  6 ++---
 kubernetes/windows/Dockerfile              | 31 ++++++++++++++++++++--
 kubernetes/windows/baseimage/Dockerfile    | 28 -------------------
 7 files changed, 39 insertions(+), 40 deletions(-)
 delete mode 100644 kubernetes/windows/baseimage/Dockerfile

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index 933900b89..0f1d932a8 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -11,9 +11,9 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
 
-### 08/05/2020 -
-##### Version microsoft/oms:ciprod08052020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08052020 (linux)
-##### Version microsoft/oms:win-ciprod08052020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod08052020 (windows)
+### 08/07/2020 -
+##### Version microsoft/oms:ciprod08072020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08072020 (linux)
+##### Version microsoft/oms:win-ciprod08072020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod08072020 (windows)
 ##### Code change log
 - Collection of KubeState metrics for deployments and HPA
 - Add the Proxy support for Windows agent
diff --git a/ReleaseProcess.md b/ReleaseProcess.md
index 5ec42d496..19802e22c 100644
--- a/ReleaseProcess.md
+++ b/ReleaseProcess.md
@@ -15,7 +15,7 @@ Here are the high-level instructions to get the CIPROD`<MM><DD><YYYY>` image for
 4. If everything validated in DEV, make merge PR from ci_dev and ci_prod and merge once this reviewed by dev team
 6. Update following pipeline variables under ReleaseCandiate with version of chart and image tag
     - CIHELMCHARTVERSION <VersionValue> # For example, 2.7.4
-    - CIImageTagSuffix <ImageTag> # ciprod08052020 or ciprod08052020-1 etc.
+    - CIImageTagSuffix <ImageTag> # ciprod08072020 or ciprod08072020-1 etc.
 7. Merge ci_dev and ci_prod branch which will trigger automatic deployment of latest bits to CIPROD cluster with CIPROD`<MM><DD><YYYY>` image to test and scale cluters, AKS, AKS-Engine
    > Note: production image automatically pushed to CIPROD Public cloud ACR which will inturn replicated to Public cloud MCR.
 8. Validate all the scenarios against clusters in build subscription and scale clusters
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 927d24b35..610e109ef 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -7,8 +7,8 @@
 omsagent:
   image:
     repo: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod"
-    tag: "ciprod08052020"
-    tagWindows: "win-ciprod08052020"
+    tag: "ciprod08072020"
+    tagWindows: "win-ciprod08072020"
     pullPolicy: IfNotPresent
     dockerProviderVersion: "10.0.0-4"
     agentVersion: "1.10.0.1"
diff --git a/kubernetes/linux/Dockerfile b/kubernetes/linux/Dockerfile
index c82532471..bc27a5384 100644
--- a/kubernetes/linux/Dockerfile
+++ b/kubernetes/linux/Dockerfile
@@ -2,7 +2,7 @@ FROM ubuntu:18.04
 MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
-ARG IMAGE_TAG=ciprod08052020
+ARG IMAGE_TAG=ciprod08072020
 ENV AGENT_VERSION ${IMAGE_TAG}
 ENV tmpdir /opt
 ENV APPLICATIONINSIGHTS_AUTH NzAwZGM5OGYtYTdhZC00NThkLWI5NWMtMjA3ZjM3NmM3YmRi
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index ac712722a..29533e678 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -343,7 +343,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08052020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08072020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -486,7 +486,7 @@ spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08052020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08072020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -637,7 +637,7 @@ spec:
      serviceAccountName: omsagent
      containers:
        - name: omsagent-win
-         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod08052020"
+         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod08072020"
          imagePullPolicy: IfNotPresent
          resources:
           limits:
diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index 0b81b9c71..a18404772 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -1,9 +1,36 @@
-FROM mcr.microsoft.com/azuremonitor/containerinsights/ciprod:winakslogbase-07022020
+FROM mcr.microsoft.com/windows/servercore:ltsc2019
 MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
 
-ARG IMAGE_TAG=win-ciprod08052020
+ARG IMAGE_TAG=win-ciprod08072020
+
+# Do not split this into multiple RUN!
+# Docker creates a layer for every RUN-Statement
+RUN powershell -Command "Set-ExecutionPolicy Bypass -Scope Process -Force; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))"
+# Fluentd depends on cool.io whose fat gem is only available for Ruby < 2.5, so need to specify --platform ruby when install Ruby > 2.5 and install msys2 to get dev tools
+RUN choco install -y ruby --version 2.6.5.1 --params "'/InstallDir:C:\ruby26'" \
+&& choco install -y msys2 --version 20190524.0.0.20191030 --params "'/NoPath /NoUpdate /InstallDir:C:\ruby26\msys64'" \
+&& choco install -y vim
+
+# gangams - optional MSYS2 update via ridk failing in merged docker file so skipping that since we dont need optional update
+RUN refreshenv \
+&& ridk install 3 \
+&& echo gem: --no-document >> C:\ProgramData\gemrc \
+&& gem install cool.io -v 1.5.4 --platform ruby \
+&& gem install oj -v 3.3.10 \
+&& gem install json -v 2.2.0 \
+&& gem install fluentd -v 1.10.2 \
+&& gem install win32-service -v 1.0.1 \
+&& gem install win32-ipc -v 0.7.0 \
+&& gem install win32-event -v 0.6.3 \
+&& gem install windows-pr -v 1.2.6 \
+&& gem install tomlrb -v 1.3.0 \
+&& gem install gyoku -v 1.3.1 \
+&& gem sources --clear-all
+
+# Remove gem cache and chocolatey
+RUN powershell -Command "Remove-Item -Force C:\ruby26\lib\ruby\gems\2.6.0\cache\*.gem; Remove-Item -Recurse -Force 'C:\ProgramData\chocolatey'"
 
 SHELL ["powershell"]
 
diff --git a/kubernetes/windows/baseimage/Dockerfile b/kubernetes/windows/baseimage/Dockerfile
deleted file mode 100644
index 122daa9cc..000000000
--- a/kubernetes/windows/baseimage/Dockerfile
+++ /dev/null
@@ -1,28 +0,0 @@
-FROM mcr.microsoft.com/windows/servercore:ltsc2019
-
-# Do not split this into multiple RUN!
-# Docker creates a layer for every RUN-Statement
-RUN powershell -Command "Set-ExecutionPolicy Bypass -Scope Process -Force; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))"
-
-# Fluentd depends on cool.io whose fat gem is only available for Ruby < 2.5, so need to specify --platform ruby when install Ruby > 2.5 and install msys2 to get dev tools
-RUN choco install -y ruby --version 2.6.5.1 --params "'/InstallDir:C:\ruby26'" \
-&& choco install -y msys2 --version 20190524.0.0.20191030 --params "'/NoPath /NoUpdate /InstallDir:C:\ruby26\msys64'" \
-&& choco install -y vim
-RUN refreshenv \
-&& ridk install 2 3 \
-&& echo gem: --no-document >> C:\ProgramData\gemrc \
-&& gem install cool.io -v 1.5.4 --platform ruby \
-&& gem install oj -v 3.3.10 \
-&& gem install json -v 2.2.0 \
-&& gem install fluentd -v 1.10.2 \
-&& gem install win32-service -v 1.0.1 \
-&& gem install win32-ipc -v 0.7.0 \
-&& gem install win32-event -v 0.6.3 \
-&& gem install windows-pr -v 1.2.6 \
-&& gem install tomlrb -v 1.3.0 \
-&& gem install gyoku -v 1.3.1 \
-&& gem sources --clear-all
-
-# Remove gem cache and chocolatey
-RUN powershell -Command "Remove-Item -Force C:\ruby26\lib\ruby\gems\2.6.0\cache\*.gem; Remove-Item -Recurse -Force 'C:\ProgramData\chocolatey'"
-

From c5c28f0dc4f89893aea4215c6fd5647b904c4c92 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Thu, 13 Aug 2020 11:00:19 -0700
Subject: [PATCH 06/60] Gangams/cluster creation scripts (#414)

* onprem k8s script

* script updates

* scripts for creating non-aks clusters

* fix minor text update

* updates

* script updates

* fix

* script updates

* fix scripts to install docker
---
 scripts/cluster-creation/README.md          |  45 +++++
 scripts/cluster-creation/aks-engine.sh      | 163 +++++++++++++++++
 scripts/cluster-creation/arc-k8s-cluster.sh | 190 ++++++++++++++++++++
 scripts/cluster-creation/aro-v4.sh          | 146 +++++++++++++++
 scripts/cluster-creation/onprem-k8s.sh      | 106 +++++++++++
 5 files changed, 650 insertions(+)
 create mode 100644 scripts/cluster-creation/README.md
 create mode 100644 scripts/cluster-creation/aks-engine.sh
 create mode 100644 scripts/cluster-creation/arc-k8s-cluster.sh
 create mode 100644 scripts/cluster-creation/aro-v4.sh
 create mode 100755 scripts/cluster-creation/onprem-k8s.sh

diff --git a/scripts/cluster-creation/README.md b/scripts/cluster-creation/README.md
new file mode 100644
index 000000000..57d0c5dbf
--- /dev/null
+++ b/scripts/cluster-creation/README.md
@@ -0,0 +1,45 @@
+# Instructions to create k8s clusters
+
+## On-Prem K8s Cluster
+
+on-prem k8s cluster can be created on any VM or physical machine using kind.
+
+```
+bash onprem-k8s.sh --cluster-name <name-of-the-cluster>
+```
+
+## AKS-Engine cluster
+
+aks-engine is unmanaged cluster in azure and you can use below command to create the cluster in azure.
+
+```
+
+# Either you can reuse existing service principal or create one with below instructions
+subscriptionId="<subscription id>"
+az account set -s ${subscriptionId}
+sp=$(az ad sp create-for-rbac --role="Contributor" --scopes="/subscriptions/${subscriptionId}")
+# get the appId (i.e. clientid) and password (i.e. clientSecret)
+echo $sp
+
+clientId=$(echo $sp | jq '.appId')
+clientSecret=$(echo $sp | jq '.password')
+
+# create the aks-engine
+bash aks-engine.sh --subscription-id "<subscriptionId>" --client-id "<clientId>" --client-secret "<clientSecret>" --dns-prefix "<clusterDnsPrefix>" --location "<location>"
+```
+
+## ARO v4 Cluster
+
+Azure Redhat Openshift v4 cluster can be created with below command.
+
+> Note: Because of the cleanup policy on internal subscriptions, cluster creation can fail if you dont change cleanup service to none on the subnets of aro vnet before creation.
+```
+bash aro-v4.sh --subscription-id "<subscriptionId>" --resource-group "<rgName>" --cluster-name "<clusterName>" --location "<location>"
+```
+## Azure Arc K8s cluster
+
+you can connect on-prem k8s cluster or unmanaged k8s cluster such as aks-engine to azure through azure arc.
+
+```
+bash arc-k8s-cluster.sh --subscription-id "<subId>" --resource-group "<rgName>" --cluster-name "<clusterName>" --location "<location>" --kube-context "<contextofexistingcluster>"
+```
diff --git a/scripts/cluster-creation/aks-engine.sh b/scripts/cluster-creation/aks-engine.sh
new file mode 100644
index 000000000..9d287ea07
--- /dev/null
+++ b/scripts/cluster-creation/aks-engine.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+set -e
+TEMP_DIR=temp-$RANDOM
+DEFAULT_ONPREM_K8S_CLUSTER="aks-engine-k8s-test"
+AKS_ENGINE_VERSION="v0.54.0"
+
+download-aks-engine()
+{
+    sudo curl -LO https://github.com/Azure/aks-engine/releases/download/${AKS_ENGINE_VERSION}/aks-engine-v0.54.0-linux-amd64.tar.gz
+    sudo tar -xvf aks-engine-${AKS_ENGINE_VERSION}-linux-amd64.tar.gz
+    sudo mv aks-engine-${AKS_ENGINE_VERSION}-linux-amd64 aks-engine
+    sudo mv -f aks-engine/aks-engine /usr/local/bin
+}
+
+
+usage()
+{
+    local basename=`basename $0`
+    echo
+    echo "create aks-engine cluster:"
+    echo "$basename deploy --subscription-id <subscriptionId> --client-id <clientId> --client-secret <clientSecret> --dns-prefix <dns-prefix> --location <location>"
+}
+
+parse_args()
+{
+
+ if [ $# -le 1 ]
+  then
+    usage
+    exit 1
+ fi
+
+# Transform long options to short ones
+for arg in "$@"; do
+  shift
+  case "$arg" in
+    "--subscription-id")  set -- "$@" "-s" ;;
+    "--client-id") set -- "$@" "-c" ;;
+    "--client-secret") set -- "$@" "-w" ;;
+    "--dns-prefix") set -- "$@" "-d" ;;
+    "--location") set -- "$@" "-l" ;;
+    "--"*)   usage ;;
+    *)        set -- "$@" "$arg"
+  esac
+done
+
+local OPTIND opt
+
+while getopts 'hs:c:w:d:l:' opt; do
+    case "$opt" in
+      h)
+      usage
+        ;;
+
+      s)
+        subscriptionId="$OPTARG"
+        echo "subscriptionId is $OPTARG"
+        ;;
+
+      c)
+        clientId="$OPTARG"
+        echo "clientId is $OPTARG"
+        ;;
+
+      w)
+        clientSecret="$OPTARG"
+        echo "clientSecret is $OPTARG"
+        ;;
+
+      d)
+        dnsPrefix="$OPTARG"
+        echo "dnsPrefix is $OPTARG"
+        ;;
+
+      l)
+        location="$OPTARG"
+        echo "location is $OPTARG"
+        ;;
+
+      ?)
+        usage
+        exit 1
+        ;;
+    esac
+  done
+  shift "$(($OPTIND -1))"
+
+
+}
+create_cluster()
+{
+
+sudo touch kubernetes.json
+sudo chmod 777 kubernetes.json
+# For docker runtime, remove kubernetesConfig block
+cat >> kubernetes.json <<EOL
+{
+  "apiVersion": "vlabs",
+  "properties": {
+    "orchestratorProfile": {
+      "orchestratorType": "Kubernetes",
+	   "orchestratorRelease": "1.16",
+      "kubernetesConfig": {
+       "containerRuntime": "containerd"
+       }
+    },
+    "masterProfile": {
+      "count": 1,
+      "dnsPrefix": "",
+      "vmSize": "Standard_D2_v3"
+    },
+    "agentPoolProfiles": [
+      {
+        "name": "agentpool1",
+        "count": 2,
+        "vmSize": "Standard_D2_v3"
+      }
+    ],
+    "linuxProfile": {
+      "adminUsername": "azureuser",
+      "ssh": {
+        "publicKeys": [
+          {
+            "keyData": ""
+          }
+        ]
+      }
+    },
+    "servicePrincipalProfile": {
+      "clientId": "",
+      "secret": ""
+    }
+  }
+}
+EOL
+
+echo "deploying aks-engine cluster ..."
+sudo aks-engine deploy --subscription-id ${subscriptionId} --client-id ${clientId} --client-secret ${clientSecret} --dns-prefix ${dnsPrefix} --location ${location} --api-model  kubernetes.json
+echo "deploying of aks-engine cluster completed."
+
+}
+
+
+
+echo "creating aks-engine k8s cluster ..."
+echo "AKS-ENGINE version: ${AKS_ENGINE_VERSION}"
+cd ~
+echo "creating temp directory":$TEMP_DIR
+sudo mkdir $TEMP_DIR && cd $TEMP_DIR
+
+echo "validate args"
+parse_args $@
+
+echo "download aks-engine"
+download-aks-engine
+
+echo "creating cluster: ${ClusterName}"
+create_cluster
+echo "creating aks-engine cluster completed."
+
+echo "changing file permissions to access the kubeconfig"
+sudo chmod -R 777  ~/${TEMP_DIR}/_output
+echo "kubeconfig of this cluster should be under ~/${TEMP_DIR}/_output/${dnsPrefix}/kubeconfig"
diff --git a/scripts/cluster-creation/arc-k8s-cluster.sh b/scripts/cluster-creation/arc-k8s-cluster.sh
new file mode 100644
index 000000000..ee625a8b8
--- /dev/null
+++ b/scripts/cluster-creation/arc-k8s-cluster.sh
@@ -0,0 +1,190 @@
+#!/bin/bash
+set -e
+TEMP_DIR=temp-$RANDOM
+DefaultCloud="AzureCloud"
+HELM_VERSION="v3.2.1"
+
+install-helm()
+{
+  sudo curl -LO  https://get.helm.sh/helm-${HELM_VERSION}-linux-amd64.tar.gz
+  sudo tar -zxvf helm-${HELM_VERSION}-linux-amd64.tar.gz
+  sudo rm -rf /usr/local/bin/helm
+  sudo mv linux-amd64/helm /usr/local/bin/helm
+}
+
+download-and-install-azure-cli()
+{
+  # https://docs.microsoft.com/en-us/cli/azure/install-azure-cli-apt?view=azure-cli-latest#install-with-one-command
+  sudo curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
+}
+
+install-and-update-k8s-extensions()
+{
+  echo "install connectedk8s extension"
+  az extension add --name connectedk8s
+
+  echo "install k8sconfiguration  extension"
+  az extension add --name k8sconfiguration
+
+  echo "update connectedk8s  extension"
+  az extension update --name connectedk8s
+
+  echo "update k8sconfiguration  extension"
+  az extension update --name k8sconfiguration
+}
+
+install_arc_k8s_prerequisites()
+{
+   echo "register Microsoft.Kubernetes provider"
+   az provider register --namespace Microsoft.Kubernetes --wait
+
+   echo "register Microsoft.KubernetesConfiguration provider"
+   az provider register --namespace Microsoft.KubernetesConfiguration --wait
+
+   k8sRegistrationState=$(az provider show -n Microsoft.Kubernetes --query registrationState -o tsv)
+   k8sRegistrationState=$(echo $k8sRegistrationState | tr "[:upper:]" "[:lower:]")
+   echo "Microsoft.Kubernetes registration state: ${k8sRegistrationState}"
+   if [ "$k8sRegistrationState" != "registered" ]; then
+      echo "registartion requires around 5 to 10 mins so waiting for 5 mins"
+      sleep 5m
+   fi
+
+   k8sConfigState=$(az provider show -n Microsoft.KubernetesConfiguration --query registrationState -o tsv)
+   k8sConfigState=$(echo $k8sConfigState | tr "[:upper:]" "[:lower:]")
+   echo "Microsoft.KubernetesConfiguration registration state: ${k8sConfigState}"
+   if [ "$k8sConfigState" != "registered" ]; then
+      echo "registartion requires around 5 to 10 mins so waiting for 5 mins"
+      sleep 5m
+   fi
+}
+
+
+usage()
+{
+    local basename=`basename $0`
+    echo
+    echo "connect k8s cluster to azure arc:"
+    echo "$basename --subscription-id <subscriptionId> --resource-group <rgName> --cluster-name <clusterName> --location <location> --kube-context <kubecontextofthek8scluster>"
+}
+
+parse_args()
+{
+
+ if [ $# -le 1 ]
+  then
+    usage
+    exit 1
+ fi
+
+# Transform long options to short ones
+for arg in "$@"; do
+  shift
+  case "$arg" in
+    "--subscription-id")  set -- "$@" "-s" ;;
+    "--resource-group") set -- "$@" "-r" ;;
+    "--cluster-name") set -- "$@" "-c" ;;
+    "--location") set -- "$@" "-l" ;;
+    "--kube-context") set -- "$@" "-k" ;;
+    "--"*)   usage ;;
+    *)        set -- "$@" "$arg"
+  esac
+done
+
+local OPTIND opt
+
+while getopts 'hs:r:c:l:k:' opt; do
+    case "$opt" in
+      h)
+      usage
+        ;;
+
+      s)
+        subscriptionId="$OPTARG"
+        echo "subscriptionId is $OPTARG"
+        ;;
+
+      r)
+        resourceGroupName="$OPTARG"
+        echo "resourceGroupName is $OPTARG"
+        ;;
+
+      c)
+        clusterName="$OPTARG"
+        echo "clusterName is $OPTARG"
+        ;;
+
+      l)
+        location="$OPTARG"
+        echo "location is $OPTARG"
+        ;;
+
+      k)
+        kubecontext="$OPTARG"
+        echo "kubecontext is $OPTARG"
+        ;;
+
+      ?)
+        usage
+        exit 1
+        ;;
+    esac
+  done
+  shift "$(($OPTIND -1))"
+
+
+}
+
+connect_azure_arc_k8s()
+{
+
+  echo "create resource group: ${resourceGroupName} if it doenst exist"
+  isrgExists=$(az group exists -g ${resourceGroupName})
+  if $isrgExists; then
+     echo "resource group: ${resourceGroupName} already exists"
+  else
+      echo "creating resource group ${resourceGroupName} in region since it doesnt exist"
+      az group create -l ${location} -n ${resourceGroupName}
+  fi
+
+  echo "connecting k8s cluster with kube-context : ${kubecontext} to azure with clustername: ${clusterName} and resourcegroup: ${resourceGroupName}  ..."
+  az connectedk8s connect --name ${clusterName} --resource-group ${resourceGroupName}
+  echo "connecting k8s cluster with kube-context : ${kubecontext} to azure with clustername: ${clusterName} and resourcegroup: ${resourceGroupName} completed."
+}
+
+
+
+echo "connecting k8s cluster to azure arc..."
+echo "HELM version: ${HELM_VERSION}"
+cd ~
+echo "creating temp directory":$TEMP_DIR
+sudo mkdir $TEMP_DIR && cd $TEMP_DIR
+
+echo "validate args"
+parse_args $@
+
+echo "set the ${DefaultCloud} for azure cli"
+az cloud set -n $DefaultCloud
+
+echo "login to azure cli"
+az login --use-device-code
+
+echo "set the subscription ${subscriptionId} for cli"
+az account set -s $subscriptionId
+
+echo "installing helm client ..."
+install-helm
+echo "installing helm client completed."
+
+echo "installing azure cli ..."
+download-and-install-azure-cli
+echo "installing azure cli completed."
+
+echo "installing arc k8s extensions and pre-requisistes ..."
+install_arc_k8s_prerequisites
+echo "installing arc k8s extensions and pre-requisites completed."
+
+echo "connecting cluster to azure arc k8s via azure arc "
+connect_azure_arc_k8s
+echo "connecting cluster to azure arc k8s via azure arc completed."
+
+echo "connecting k8s cluster to azure arc completed."
diff --git a/scripts/cluster-creation/aro-v4.sh b/scripts/cluster-creation/aro-v4.sh
new file mode 100644
index 000000000..8540ae931
--- /dev/null
+++ b/scripts/cluster-creation/aro-v4.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+set -e
+TEMP_DIR=temp-$RANDOM
+DefaultCloud="AzureCloud"
+DefaultVnetName="aro-net"
+DefaultMasterSubnetName="master-subnet"
+DefaultWorkerSubnetName="worker-subnet"
+
+download-and-install-azure-cli()
+{
+  # https://docs.microsoft.com/en-us/cli/azure/install-azure-cli-apt?view=azure-cli-latest#install-with-one-command
+  sudo curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
+}
+
+register_aro_v4_provider()
+{
+   echo "register Microsoft.RedHatOpenShift provider"
+   az provider register -n Microsoft.RedHatOpenShift --wait
+}
+
+usage()
+{
+    local basename=`basename $0`
+    echo
+    echo "create aro v4 cluster:"
+    echo "$basename --subscription-id <subscriptionId> --resource-group <rgName> --cluster-name <clusterName> --location <location>"
+}
+
+parse_args()
+{
+
+ if [ $# -le 1 ]
+  then
+    usage
+    exit 1
+ fi
+
+# Transform long options to short ones
+for arg in "$@"; do
+  shift
+  case "$arg" in
+    "--subscription-id")  set -- "$@" "-s" ;;
+    "--resource-group") set -- "$@" "-r" ;;
+    "--cluster-name") set -- "$@" "-c" ;;
+    "--location") set -- "$@" "-l" ;;
+    "--"*)   usage ;;
+    *)        set -- "$@" "$arg"
+  esac
+done
+
+local OPTIND opt
+
+while getopts 'hs:r:c:l:' opt; do
+    case "$opt" in
+      h)
+      usage
+        ;;
+
+      s)
+        subscriptionId="$OPTARG"
+        echo "subscriptionId is $OPTARG"
+        ;;
+
+      r)
+        resourceGroupName="$OPTARG"
+        echo "resourceGroupName is $OPTARG"
+        ;;
+
+      c)
+        clusterName="$OPTARG"
+        echo "clusterName is $OPTARG"
+        ;;
+
+      l)
+        location="$OPTARG"
+        echo "location is $OPTARG"
+        ;;
+
+      ?)
+        usage
+        exit 1
+        ;;
+    esac
+  done
+  shift "$(($OPTIND -1))"
+}
+
+create_aro_v4_cluster()
+{
+
+  echo "create resource group: ${resourceGroupName} if it doenst exist"
+  isrgExists=$(az group exists -g ${resourceGroupName})
+  if $isrgExists; then
+     echo "resource group: ${resourceGroupName} already exists"
+  else
+      echo "creating resource group ${resourceGroupName} in region since it doesnt exist"
+      az group create -l ${location} -n ${resourceGroupName}
+  fi
+
+  echo "creating virtual network"
+  az network vnet create --resource-group ${resourceGroupName} --name ${DefaultVnetName} --address-prefixes 10.0.0.0/22
+
+  echo "adding empty subnet for master nodes"
+  az network vnet subnet create --resource-group ${resourceGroupName} --vnet-name ${DefaultVnetName} --name ${DefaultMasterSubnetName} --address-prefixes 10.0.0.0/23 --service-endpoints Microsoft.ContainerRegistry
+
+  echo "adding empty subnet for worker nodes"
+  az network vnet subnet create --resource-group ${resourceGroupName}  --vnet-name ${DefaultVnetName} --name ${DefaultWorkerSubnetName} --address-prefixes 10.0.2.0/23 --service-endpoints Microsoft.ContainerRegistry
+
+  echo "Please make sure disable to diable cleanup service on subnet nsgs of aor vnet for internal subscriptions"
+  sleep 1m
+
+  echo "Disable subnet private endpoint policies on the master subnet"
+  az network vnet subnet update --name ${DefaultMasterSubnetName} --resource-group ${resourceGroupName} --vnet-name ${DefaultVnetName} --disable-private-link-service-network-policies true
+
+  echo "creating ARO v4 cluster"
+  az aro create  --resource-group ${resourceGroupName} --name ${clusterName} --vnet ${DefaultVnetName}  --master-subnet ${DefaultMasterSubnetName} --worker-subnet ${DefaultWorkerSubnetName}
+
+}
+
+
+echo "creating aro v4 cluster in specified azure subscription and resource group..."
+cd ~
+echo "creating temp directory":$TEMP_DIR
+sudo mkdir $TEMP_DIR && cd $TEMP_DIR
+
+echo "validate args"
+parse_args $@
+
+echo "set the ${DefaultCloud} for azure cli"
+az cloud set -n $DefaultCloud
+
+echo "login to azure cli"
+az login --use-device-code
+
+echo "set the subscription ${subscriptionId} for cli"
+az account set -s $subscriptionId
+
+echo "installing azure cli ..."
+download-and-install-azure-cli
+echo "installing azure cli completed."
+
+echo "creating aro v4 cluster ..."
+create_aro_v4_cluster
+echo "creating aro v4 cluster completed."
+
+echo "creating aro v4 cluster in specified azure subscription and resource completed."
diff --git a/scripts/cluster-creation/onprem-k8s.sh b/scripts/cluster-creation/onprem-k8s.sh
new file mode 100755
index 000000000..147681133
--- /dev/null
+++ b/scripts/cluster-creation/onprem-k8s.sh
@@ -0,0 +1,106 @@
+#!/bin/bash
+set -e
+TEMP_DIR=temp-$RANDOM
+KIND_VERSION="v0.8.1"
+
+install-kind()
+{
+sudo curl -Lo ./kind https://kind.sigs.k8s.io/dl/${KIND_VERSION}/kind-linux-amd64
+sudo chmod +x ./kind
+sudo mv ./kind /usr/local/bin/kind
+}
+
+download_install_docker()
+{
+ echo "download docker script"
+ sudo curl -L https://get.docker.com/ -o get-docker.sh
+ echo "installing docker script"
+ sudo sh get-docker.sh
+
+ echo "add user to docker group"
+ sudo usermod -aG docker $USER
+
+}
+
+create_cluster()
+{
+sudo touch kind-config.yaml
+sudo chmod 777 kind-config.yaml
+cat >> kind-config.yaml <<EOL
+kind: Cluster
+apiVersion: kind.sigs.k8s.io/v1alpha3
+nodes:
+ - role: control-plane
+ - role: worker
+EOL
+sudo kind create cluster --config kind-config.yaml  --name $clusterName
+}
+
+usage()
+{
+    local basename=`basename $0`
+    echo
+    echo "create kind k8 cluster:"
+    echo "$basename --cluster-name <clusterName> "
+}
+
+parse_args()
+{
+
+ if [ $# -le 1 ]
+  then
+    usage
+    exit 1
+ fi
+
+# Transform long options to short ones
+for arg in "$@"; do
+  shift
+  case "$arg" in
+    "--cluster-name") set -- "$@" "-c" ;;
+    "--"*)   usage ;;
+    *)        set -- "$@" "$arg"
+  esac
+done
+
+local OPTIND opt
+
+while getopts 'hc:' opt; do
+    case "$opt" in
+      h)
+      usage
+        ;;
+
+      c)
+        clusterName="$OPTARG"
+        echo "clusterName is $OPTARG"
+        ;;
+
+      ?)
+        usage
+        exit 1
+        ;;
+    esac
+  done
+  shift "$(($OPTIND -1))"
+}
+
+echo "creating kind k8 cluster ..."
+echo "KIND version: ${KIND_VERSION}"
+cd ~
+echo "creating temp directory":$TEMP_DIR
+sudo mkdir $TEMP_DIR && cd $TEMP_DIR
+
+echo "parsing args"
+parse_args $@
+
+echo "download and install docker"
+download_install_docker
+
+echo "download and install kind"
+install-kind
+
+echo "creating cluster: ${clusterName}"
+create_cluster
+
+echo "creating kind k8 cluster completed."

From d7a3750107e6c8778f13dccb8d20767348a68292 Mon Sep 17 00:00:00 2001
From: bragi92 <kadubey@microsoft.com>
Date: Fri, 14 Aug 2020 13:00:30 -0700
Subject: [PATCH 07/60] fix: Pin to a particular version of ltsc2019 by SHA
 (#427)

---
 kubernetes/windows/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index a18404772..c8162b539 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/windows/servercore:ltsc2019
+FROM mcr.microsoft.com/windows/servercore@sha256:921bed01c2a023310bdbaa288edebd82c4910e536ff206b87e9cbe703ca27505
 MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"

From 5e8de91534c59a9bff4d786f2085195dca67392d Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Fri, 14 Aug 2020 14:17:53 -0700
Subject: [PATCH 08/60] enable collecting npm metrics (optionally) (#425)

* enable collecting npm metrics (optionally)

* fix default enrichment value

* fix adx
---
 build/linux/installer/conf/telegraf-rs.conf   |  42 +++++++
 build/linux/installer/conf/telegraf.conf      |  41 +++++++
 .../installer/datafiles/base_container.data   |   1 +
 .../scripts/tomlparser-npm-config.rb          | 113 ++++++++++++++++++
 kubernetes/container-azm-ms-agentconfig.yaml  |   4 +
 kubernetes/linux/main.sh                      |  11 +-
 .../plugins/ruby/CAdvisorMetricsAPIClient.rb  |  10 +-
 7 files changed, 220 insertions(+), 2 deletions(-)
 create mode 100644 build/linux/installer/scripts/tomlparser-npm-config.rb

diff --git a/build/linux/installer/conf/telegraf-rs.conf b/build/linux/installer/conf/telegraf-rs.conf
index f1e9cc282..3f2f65cff 100644
--- a/build/linux/installer/conf/telegraf-rs.conf
+++ b/build/linux/installer/conf/telegraf-rs.conf
@@ -611,3 +611,45 @@ $AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER
 #     Computer = "placeholder_hostname"
 #     ControllerType = "$CONTROLLER_TYPE"
 
+##npm
+[[inputs.prometheus]]
+  #name_prefix="container.azm.ms/"
+  ## An array of urls to scrape metrics from.
+  urls = ["$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER"]
+  fielddrop = ["$AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER"]
+
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## An array of Kubernetes services to scrape metrics from.
+  # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
+
+  ## Kubernetes config file to create client from.
+  # kube_config = "/path/to/kubernetes.config"
+
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to `https` & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
+  # monitor_kubernetes_pods = true
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  ## OR
+  # bearer_token_string = "abc_123"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  #tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
+  #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
+  #[inputs.prometheus.tagpass]
+  #  operation_type = ["create_container", "remove_container", "pull_image"]
+
diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf
index b554dd4b3..19b6058be 100644
--- a/build/linux/installer/conf/telegraf.conf
+++ b/build/linux/installer/conf/telegraf.conf
@@ -703,6 +703,47 @@
   insecure_skip_verify = true
   #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
 
+##npm
+[[inputs.prometheus]]
+  #name_prefix="container.azm.ms/"
+  ## An array of urls to scrape metrics from.
+  urls = ["$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE"]
+  
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+  ## An array of Kubernetes services to scrape metrics from.
+  # kubernetes_services = ["http://my-service-dns.my-namespace:9100/metrics"]
+
+  ## Kubernetes config file to create client from.
+  # kube_config = "/path/to/kubernetes.config"
+
+  ## Scrape Kubernetes pods for the following prometheus annotations:
+  ## - prometheus.io/scrape: Enable scraping for this pod
+  ## - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+  ##     set this to `https` & most likely set the tls config.
+  ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+  ## - prometheus.io/port: If port is not 9102 use this annotation
+  # monitor_kubernetes_pods = true
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  ## OR
+  # bearer_token_string = "abc_123"
+
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  #tls_cert = /path/to/certfile
+  # tls_key = /path/to/keyfile
+  ## Use TLS but skip chain & host verification
+  insecure_skip_verify = true
+  #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"]
+  #[inputs.prometheus.tagpass]
+  #  operation_type = ["create_container", "remove_container", "pull_image"]
+
 # [[inputs.exec]]
 #   ## Commands array
 #   interval = "15m"
diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data
index f07e71b2d..fc5a6c8bc 100644
--- a/build/linux/installer/datafiles/base_container.data
+++ b/build/linux/installer/datafiles/base_container.data
@@ -125,6 +125,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/tomlparser.rb;                                             build/common/installer/scripts/tomlparser.rb;     755; root; root
 /opt/td-agent-bit-conf-customizer.rb;                           build/common/installer/scripts/td-agent-bit-conf-customizer.rb;     755; root; root
 /opt/ConfigParseErrorLogger.rb;                                 build/common/installer/scripts/ConfigParseErrorLogger.rb;           755; root; root
+/opt/tomlparser-npm-config.rb;                                  build/linux/installer/scripts/tomlparser-npm-config.rb;     755; root; root
 
 
 /opt/microsoft/omsagent/plugin/filter_cadvisor_health_container.rb;                                 source/plugins/ruby/filter_cadvisor_health_container.rb; 644; root; root
diff --git a/build/linux/installer/scripts/tomlparser-npm-config.rb b/build/linux/installer/scripts/tomlparser-npm-config.rb
new file mode 100644
index 000000000..c5953836b
--- /dev/null
+++ b/build/linux/installer/scripts/tomlparser-npm-config.rb
@@ -0,0 +1,113 @@
+#!/usr/local/bin/ruby
+
+#this should be require relative in Linux and require in windows, since it is a gem install on windows
+@os_type = ENV["OS_TYPE"]
+if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+  require "tomlrb"
+else
+  require_relative "tomlrb"
+end
+
+require_relative "ConfigParseErrorLogger"
+
+@configMapMountPath = "/etc/config/settings/integrations"
+@configSchemaVersion = ""
+@collect_basic_npm_metrics = false
+@collect_advanced_npm_metrics = false
+@npm_node_url="http://$NODE_IP:10091/node-metrics"
+@npm_cluster_url="http://npm-metrics-cluster-service.kube-system:9000/cluster-metrics"
+@npm_basic_drop_metrics_cluster = "npm_ipset_counts"
+
+# Use parser to parse the configmap toml file to a ruby structure
+def parseConfigMap
+  begin
+    # Check to see if config map is created
+    if (File.file?(@configMapMountPath))
+      puts "config::configmap container-azm-ms-agentconfig for npm metrics found, parsing values"
+      parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true)
+      puts "config::Successfully parsed mounted config map for npm metrics"
+      return parsedConfig
+    else
+      puts "config::configmap container-azm-ms-agentconfig for npm metrics not mounted, using defaults"
+      return nil
+    end
+  rescue => errorStr
+    ConfigParseErrorLogger.logError("Exception while parsing config map for npm metrics: #{errorStr}, using defaults, please check config map for errors")
+    return nil
+  end
+end
+
+# Use the ruby structure created after config parsing to set the right values to be used as environment variables
+def populateSettingValuesFromConfigMap(parsedConfig)
+  begin
+    if !parsedConfig.nil? && !parsedConfig[:integrations].nil? && !parsedConfig[:integrations][:azure_network_policy_manager].nil? && !parsedConfig[:integrations][:azure_network_policy_manager][:collect_advanced_metrics].nil?
+        advanced_npm_metrics = parsedConfig[:integrations][:azure_network_policy_manager][:collect_advanced_metrics].to_s
+        puts "got:integrations.azure_network_policy_manager.collect_advanced_metrics='#{advanced_npm_metrics}'"
+        if !advanced_npm_metrics.nil? && advanced_npm_metrics.strip.casecmp("true") == 0
+            @collect_advanced_npm_metrics = true
+        else
+            @collect_advanced_npm_metrics = false
+        end
+        puts "set:integrations.azure_network_policy_manager.collect_advanced_metrics=#{@collect_advanced_npm_metrics}"
+    end
+  rescue => errorStr
+    puts "config::error:Exception while reading config settings for npm advanced setting - #{errorStr}, using defaults"
+    @collect_advanced_npm_metrics = false
+  end
+  begin
+    if !parsedConfig.nil? && !parsedConfig[:integrations].nil? && !parsedConfig[:integrations][:azure_network_policy_manager].nil? && !parsedConfig[:integrations][:azure_network_policy_manager][:collect_basic_metrics].nil?
+        basic_npm_metrics = parsedConfig[:integrations][:azure_network_policy_manager][:collect_basic_metrics].to_s
+        puts "got:integrations.azure_network_policy_manager.collect_basic_metrics='#{basic_npm_metrics}'"
+        if !basic_npm_metrics.nil? && basic_npm_metrics.strip.casecmp("true") == 0
+            @collect_basic_npm_metrics = true
+        else
+            @collect_basic_npm_metrics = false
+        end
+        puts "set:integrations.azure_network_policy_manager.collect_basic_metrics=#{@collect_basic_npm_metrics}"
+    end
+  rescue => errorStr
+    puts "config::error:Exception while reading config settings for npm basic setting - #{errorStr}, using defaults"
+    @collect_basic_npm_metrics = false
+  end
+end
+
+@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+puts "****************Start Config Processing********************"
+if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
+  configMapSettings = parseConfigMap
+  if !configMapSettings.nil?
+    populateSettingValuesFromConfigMap(configMapSettings)
+  end
+else
+  if (File.file?(@configMapMountPath))
+    ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
+  end
+  @collect_basic_npm_metrics = false
+  @collect_advanced_npm_metrics = false
+end
+
+# Write the settings to file, so that they can be set as environment variables
+file = File.open("integration_npm_config_env_var", "w")
+
+if !file.nil?
+  if @collect_advanced_npm_metrics == true 
+    file.write("export TELEMETRY_NPM_INTEGRATION_METRICS_ADVANCED=1\n")
+    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE=#{@npm_node_url}\n")
+    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER=#{@npm_cluster_url}\n")
+    file.write("export AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER=\n")
+  elsif @collect_basic_npm_metrics == true
+    file.write("export TELEMETRY_NPM_INTEGRATION_METRICS_BASIC=1\n")
+    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE=#{@npm_node_url}\n")
+    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER=#{@npm_cluster_url}\n")
+    file.write("export AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER=#{@npm_basic_drop_metrics_cluster}\n")
+  else
+    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE=\n")
+    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER=\n")
+    file.write("export AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER=\n")
+  end
+  # Close file after writing all environment variables
+  file.close
+else
+  puts "Exception while opening file for writing config environment variables"
+  puts "****************End Config Processing********************"
+end
\ No newline at end of file
diff --git a/kubernetes/container-azm-ms-agentconfig.yaml b/kubernetes/container-azm-ms-agentconfig.yaml
index f3f442608..58e09f041 100644
--- a/kubernetes/container-azm-ms-agentconfig.yaml
+++ b/kubernetes/container-azm-ms-agentconfig.yaml
@@ -100,6 +100,10 @@ data:
         container_memory_rss_threshold_percentage = 95.0
         # Threshold for container memoryWorkingSet, metric will be sent only when memory working set exceeds or becomes equal to the following percentage
         container_memory_working_set_threshold_percentage = 95.0
+  integrations: |-
+    [integrations.azure_network_policy_manager]
+        collect_basic_metrics = false
+        collect_advanced_metrics = false
 metadata:
   name: container-azm-ms-agentconfig
   namespace: kube-system
diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh
index 92f4977d6..311470660 100644
--- a/kubernetes/linux/main.sh
+++ b/kubernetes/linux/main.sh
@@ -160,7 +160,7 @@ done
 source config_env_var
 
 
-#Parse the configmap to set the right environment variables.
+#Parse the configmap to set the right environment variables for health feature.
 /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-health-config.rb
 
 cat health_config_env_var | while read line; do
@@ -169,6 +169,15 @@ cat health_config_env_var | while read line; do
 done
 source health_config_env_var
 
+#Parse the configmap to set the right environment variables for network policy manager (npm) integration.
+/opt/microsoft/omsagent/ruby/bin/ruby tomlparser-npm-config.rb
+
+cat integration_npm_config_env_var | while read line; do
+    #echo $line
+    echo $line >> ~/.bashrc
+done
+source integration_npm_config_env_var
+
 #Replace the placeholders in td-agent-bit.conf file for fluentbit with custom/default values in daemonset
 if [ ! -e "/etc/config/kube.conf" ]; then
       /opt/microsoft/omsagent/ruby/bin/ruby td-agent-bit-conf-customizer.rb
diff --git a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
index 42ecfcaf0..13796cd1e 100644
--- a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
+++ b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
@@ -33,6 +33,8 @@ class CAdvisorMetricsAPIClient
   @cAdvisorMetricsSecurePort = ENV["IS_SECURE_CADVISOR_PORT"]
   @containerLogsRoute = ENV["AZMON_CONTAINER_LOGS_ROUTE"]
   @hmEnabled = ENV["AZMON_CLUSTER_ENABLE_HEALTH_MODEL"]
+  @npmIntegrationBasic = ENV["TELEMETRY_NPM_INTEGRATION_METRICS_BASIC"]
+  @npmIntegrationAdvanced = ENV["TELEMETRY_NPM_INTEGRATION_METRICS_ADVANCED"]
 
   @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
   @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
@@ -250,7 +252,13 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                      #telemetry about health model
                      if (!@hmEnabled.nil? && !@hmEnabled.empty?)
                       telemetryProps["hmEnabled"] = @hmEnabled
-                    end
+                     end
+                     #telemetry for npm integration
+                     if (!@npmIntegrationAdvanced.nil? && !@npmIntegrationAdvanced.empty?)
+                       telemetryProps["int-npm-a"] = "1"
+                     elsif (!@npmIntegrationBasic.nil? && !@npmIntegrationBasic.empty?)
+                       telemetryProps["int-npm-b"] = "1"
+                     end
                     ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
                   end
                 end

From 17e7ff8bf65c6fd3ab2dc2b47043249055e2dc3d Mon Sep 17 00:00:00 2001
From: saaror <31900410+saaror@users.noreply.github.com>
Date: Mon, 17 Aug 2020 00:56:26 -0700
Subject: [PATCH 09/60] Saaror patch 3 (#426)

* Create README.MD

Creating content for Kubecon lab

* Update README.MD

* Update README.MD
---
 Kubecon/README.MD | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 Kubecon/README.MD

diff --git a/Kubecon/README.MD b/Kubecon/README.MD
new file mode 100644
index 000000000..873cfaf9a
--- /dev/null
+++ b/Kubecon/README.MD
@@ -0,0 +1,36 @@
+# Kubecon Azure Monitor for containers lab
+
+## Overview
+
+### This Azure Monitor for containers lab will give you hands on experience to monitor AKS workloads. In this lab you will be working Azure Monitor, Log Analytics and Azure Monitor for Container Insights.
+
+## Instructions for lab
+
+1. Set-up environment [Setup Guide](https://github.com/rkuehfus/pre-ready-2019-H1/blob/master/Student/Guides/Deployment%20Setup%20Guide.docx?raw=true)
+
+2. Tasks for the lab
+  *  From your Visual Studio Server, deploy the eShoponWeb application to AKS using Dev Spaces
+  *  From Azure Monitor, locate the container running the eShoponWeb application
+  *  Generate an exception in the eShoponWeb application(Hint: Try to change your password)
+  *  Optimize the Azure Monitor for contains ingestion cost by fine tuning log-collection parameters like std-out/std-error, namespace.
+
+## Outcome
+
+### Understand Azure Monitor capabilities, facilitate an Azure Monitor customer conversation, and demo key features of Azure Monitor.
+
+## Target Audience
+
+This content has been targeted to devops/SRE intended to build their knowledge on Azure Monitor also for people that have a passion around Monitoring are more than welcome to attend.
+
+## Prerequisites
+ 1.	Please review the following content before the event
+  a.  [Azure Monitor for containers Overview](https://docs.microsoft.com/azure/azure-monitor/insights/container-insights-overview)
+  b.  [Optimize Azure Monitor for containers cost ](https://medium.com/microsoftazure/azure-monitor-for-containers-optimizing-data-collection-settings-for-cost-ce6f848aca32)
+
+2.	Attendees have access to an Azure Subscription where they can each deploy the provided ARM template that will build a very detailed infrastructure to monitor.  This includes the Vnet, subnets, NSG(s), LB(s), NAT rules, scales set and a fully functional .NET Core Application (eShopOnWeb) to monitor.
+3.	Attendees should have a level 200-300 understanding of the Azure platform.  Understand concepts like PowerShell, Azure Cli, ARM, resource groups, RBAC, network, storage, compute, scale sets, virtual machines and security.  Previous experience working with ARM templates is recommended.
+4.	Access to a machine with Visual Studio Code and the Azure PowerShell Modules loaded or Azure CLI. VS Code ARM and PowerShell extensions should be configured.
+
+![alt text](https://raw.githubusercontent.com/rkuehfus/pre-ready-2019-H1/master/monitoringhackdiagram.png)
+
+

From 6c7c6757b8c8cc87eaa89516393788d3d942857b Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 18 Aug 2020 11:53:59 -0700
Subject: [PATCH 10/60] Gangams/add containerd support to windows agent (#428)

* wip

* wip

* wip

* wip

* bug fix related to uri

* wip

* wip

* fix bug with ignore cert validation

* logic to ignore cert validation

* minor

* fix minor debug log issue

* improve log message

* debug message

* fix bug with nullorempty check

* remove debug statements

* refactor parsers

* add debug message

* clean up

* chart updates

* fix formatting issues
---
 .../installer/conf/fluent-cri-parser.conf     |   6 +
 .../installer/conf/fluent-docker-parser.conf  |   5 +
 build/windows/installer/conf/fluent.conf      |  32 ++-
 .../templates/omsagent-daemonset-windows.yaml |   7 +
 kubernetes/omsagent.yaml                      |   4 +
 kubernetes/windows/Dockerfile                 |   3 +
 kubernetes/windows/main.ps1                   | 199 ++++++++++++++----
 7 files changed, 198 insertions(+), 58 deletions(-)
 create mode 100644 build/windows/installer/conf/fluent-cri-parser.conf
 create mode 100644 build/windows/installer/conf/fluent-docker-parser.conf

diff --git a/build/windows/installer/conf/fluent-cri-parser.conf b/build/windows/installer/conf/fluent-cri-parser.conf
new file mode 100644
index 000000000..86f1572ca
--- /dev/null
+++ b/build/windows/installer/conf/fluent-cri-parser.conf
@@ -0,0 +1,6 @@
+<parse>
+  @type  regexp
+  expression ^(?<time>[^ ]+) (?<stream>stdout|stderr) (?<logtag>[^ ]*) (?<log>.*)$
+  time_format %Y-%m-%dT%H:%M:%S.%NZ
+  keep_time_key true
+</parse>
diff --git a/build/windows/installer/conf/fluent-docker-parser.conf b/build/windows/installer/conf/fluent-docker-parser.conf
new file mode 100644
index 000000000..9dc800aeb
--- /dev/null
+++ b/build/windows/installer/conf/fluent-docker-parser.conf
@@ -0,0 +1,5 @@
+<parse>
+  @type  json
+  time_format %Y-%m-%dT%H:%M:%S.%NZ
+  keep_time_key true
+ </parse>
diff --git a/build/windows/installer/conf/fluent.conf b/build/windows/installer/conf/fluent.conf
index a4cacbcf6..c96300b1e 100644
--- a/build/windows/installer/conf/fluent.conf
+++ b/build/windows/installer/conf/fluent.conf
@@ -12,11 +12,8 @@
   @log_level trace
   path_key tailed_path
   limit_recently_modified 5m
-  <parse>
-    @type  json
-    time_format %Y-%m-%dT%H:%M:%S.%NZ
-    keep_time_key true
-  </parse>
+  # if the container runtime is non docker then this will be updated to fluent-cri-parser.conf during container startup
+  @include fluent-docker-parser.conf
 </source>
 
 <source>
@@ -27,11 +24,8 @@
   @log_level trace
   path_key tailed_path
   read_from_head true
-  <parse>
-    @type  json
-    time_format %Y-%m-%dT%H:%M:%S.%NZ
-    keep_time_key true
-  </parse>
+  # if the container runtime is non docker then this will be updated to fluent-cri-parser.conf during container startup
+  @include fluent-docker-parser.conf
 </source>
 
 <filter  oms.container.**>
@@ -59,13 +53,13 @@
   </server>
 
   <buffer>
-        overflow_action throw_exception
-        chunk_limit_size 32k
-        queued_chunks_limit_size 256
-        flush_interval 1
-        flush_thread_interval 0.5
-        flush_thread_burst_interval 0.01
-        flush_thread_count 4
-        retry_forever true
-    </buffer>
+    overflow_action throw_exception
+    chunk_limit_size 32k
+    queued_chunks_limit_size 256
+    flush_interval 1
+    flush_thread_interval 0.5
+    flush_thread_burst_interval 0.01
+    flush_thread_count 4
+    retry_forever true
+  </buffer>
 </match>
diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
index 0ea7a9af6..b8e667398 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
@@ -53,6 +53,13 @@ spec:
        - name: CONTROLLER_TYPE
          value: "DaemonSet"
        - name: HOSTNAME
+         valueFrom:
+            fieldRef:
+              fieldPath: spec.nodeName
+       - name: NODE_IP
+         valueFrom:
+            fieldRef:
+              fieldPath: status.hostIP
        volumeMounts:
         - mountPath: C:\ProgramData\docker\containers
           name: docker-windows-containers
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 29533e678..db788a37e 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -660,6 +660,10 @@ spec:
             valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
+          - name: NODE_IP
+            valueFrom:
+               fieldRef:
+                 fieldPath: status.hostIP
          volumeMounts:
           - mountPath: C:\ProgramData\docker\containers
             name: docker-windows-containers
diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index c8162b539..06e11e73a 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -56,6 +56,9 @@ COPY ./omsagentwindows/out_oms.so /opt/omsagentwindows/out_oms.so
 
 # copy fluent, fluent-bit and out_oms conf files
 COPY ./omsagentwindows/installer/conf/fluent.conf /etc/fluent/
+# copy fluent docker and cri parser conf files
+COPY ./omsagentwindows/installer/conf/fluent-cri-parser.conf /etc/fluent/
+COPY ./omsagentwindows/installer/conf/fluent-docker-parser.conf /etc/fluent/
 COPY ./omsagentwindows/installer/conf/fluent-bit.conf /etc/fluent-bit
 COPY ./omsagentwindows/installer/conf/out_oms.conf /etc/omsagentwindows
 
diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1
index b7ddfa8e7..de82722ad 100644
--- a/kubernetes/windows/main.ps1
+++ b/kubernetes/windows/main.ps1
@@ -1,34 +1,51 @@
-function Confirm-WindowsServiceExists($name)
-{
-    if (Get-Service $name -ErrorAction SilentlyContinue)
+Add-Type @"
+    using System;
+    using System.Net;
+    using System.Net.Security;
+    using System.Security.Cryptography.X509Certificates;
+    public class ServerCertificateValidationCallback
     {
+        public static void Ignore()
+        {
+            ServicePointManager.ServerCertificateValidationCallback +=
+                delegate
+                (
+                    Object obj,
+                    X509Certificate certificate,
+                    X509Chain chain,
+                    SslPolicyErrors errors
+                )
+                {
+                    return true;
+                };
+        }
+    }
+"@
+function Confirm-WindowsServiceExists($name) {
+    if (Get-Service $name -ErrorAction SilentlyContinue) {
         return $true
     }
     return $false
 }
 
-function Remove-WindowsServiceIfItExists($name)
-{
+function Remove-WindowsServiceIfItExists($name) {
     $exists = Confirm-WindowsServiceExists $name
-    if ($exists)
-    {
+    if ($exists) {
         sc.exe \\server delete $name
     }
 }
 
-function Start-FileSystemWatcher
-{
+function Start-FileSystemWatcher {
     Start-Process powershell -NoNewWindow .\filesystemwatcher.ps1
 }
 
 #register fluentd as a windows service
 
-function Set-EnvironmentVariables
-{
+function Set-EnvironmentVariables {
     $domain = "opinsights.azure.com"
     if (Test-Path /etc/omsagent-secret/DOMAIN) {
         # TODO: Change to omsagent-secret before merging
-        $domain =  Get-Content /etc/omsagent-secret/DOMAIN
+        $domain = Get-Content /etc/omsagent-secret/DOMAIN
     }
 
     # Set DOMAIN
@@ -38,7 +55,7 @@ function Set-EnvironmentVariables
     $wsID = ""
     if (Test-Path /etc/omsagent-secret/WSID) {
         # TODO: Change to omsagent-secret before merging
-        $wsID =  Get-Content /etc/omsagent-secret/WSID
+        $wsID = Get-Content /etc/omsagent-secret/WSID
     }
 
     # Set DOMAIN
@@ -48,7 +65,7 @@ function Set-EnvironmentVariables
     $wsKey = ""
     if (Test-Path /etc/omsagent-secret/KEY) {
         # TODO: Change to omsagent-secret before merging
-        $wsKey =  Get-Content /etc/omsagent-secret/KEY
+        $wsKey = Get-Content /etc/omsagent-secret/KEY
     }
 
     # Set KEY
@@ -58,7 +75,7 @@ function Set-EnvironmentVariables
     $proxy = ""
     if (Test-Path /etc/omsagent-secret/PROXY) {
         # TODO: Change to omsagent-secret before merging
-        $proxy =  Get-Content /etc/omsagent-secret/PROXY
+        $proxy = Get-Content /etc/omsagent-secret/PROXY
         Write-Host "Validating the proxy configuration since proxy configuration provided"
         # valide the proxy endpoint configuration
         if (![string]::IsNullOrEmpty($proxy)) {
@@ -66,26 +83,22 @@ function Set-EnvironmentVariables
             if (![string]::IsNullOrEmpty($proxy)) {
                 $proxy = [string]$proxy.Trim();
                 $parts = $proxy -split "@"
-                if ($parts.Length -ne 2)
-                {
+                if ($parts.Length -ne 2) {
                     Write-Host "Invalid ProxyConfiguration $($proxy). EXITING....."
                     exit 1
                 }
                 $subparts1 = $parts[0] -split "//"
-                if ($subparts1.Length -ne 2)
-                {
+                if ($subparts1.Length -ne 2) {
                     Write-Host "Invalid ProxyConfiguration $($proxy). EXITING....."
                     exit 1
                 }
                 $protocol = $subparts1[0].ToLower().TrimEnd(":")
-                if (!($protocol -eq "http") -and !($protocol -eq "https"))
-                {
+                if (!($protocol -eq "http") -and !($protocol -eq "https")) {
                     Write-Host "Unsupported protocol in ProxyConfiguration $($proxy). EXITING....."
                     exit 1
                 }
                 $subparts2 = $parts[1] -split ":"
-                if ($subparts2.Length -ne 2)
-                {
+                if ($subparts2.Length -ne 2) {
                     Write-Host "Invalid ProxyConfiguration $($proxy). EXITING....."
                     exit 1
                 }
@@ -118,46 +131,154 @@ function Set-EnvironmentVariables
     .\setenv.ps1
 }
 
-function Start-Fluent
-{
+function Get-ContainerRuntime {
+    # default container runtime and make default as containerd when containerd becomes default in AKS
+    $containerRuntime = "docker"
+    $response = ""
+    $NODE_IP = ""
+    try {
+        if (![string]::IsNullOrEmpty([System.Environment]::GetEnvironmentVariable("NODE_IP", "PROCESS"))) {
+            $NODE_IP = [System.Environment]::GetEnvironmentVariable("NODE_IP", "PROCESS")
+        }
+        elseif (![string]::IsNullOrEmpty([System.Environment]::GetEnvironmentVariable("NODE_IP", "USER"))) {
+            $NODE_IP = [System.Environment]::GetEnvironmentVariable("NODE_IP", "USER")
+        }
+        elseif (![string]::IsNullOrEmpty([System.Environment]::GetEnvironmentVariable("NODE_IP", "MACHINE"))) {
+            $NODE_IP = [System.Environment]::GetEnvironmentVariable("NODE_IP", "MACHINE")
+        }
+
+        if (![string]::IsNullOrEmpty($NODE_IP)) {
+            $isPodsAPISuccess = $false
+            Write-Host "Value of NODE_IP environment variable : $($NODE_IP)"
+            try {
+                Write-Host "Making API call to http://$($NODE_IP):10255/pods"
+                $response = Invoke-WebRequest -uri http://$($NODE_IP):10255/pods  -UseBasicParsing
+                Write-Host "Response status code of API call to http://$($NODE_IP):10255/pods : $($response.StatusCode)"
+            }
+            catch {
+                Write-Host "API call to http://$($NODE_IP):10255/pods failed"
+            }
+
+            if (![string]::IsNullOrEmpty($response) -and $response.StatusCode -eq 200) {
+                Write-Host "API call to http://$($NODE_IP):10255/pods succeeded"
+                $isPodsAPISuccess = $true
+            }
+            else {
+                try {
+                    Write-Host "Making API call to https://$($NODE_IP):10250/pods"
+                    # ignore certificate validation since kubelet uses self-signed cert
+                    [ServerCertificateValidationCallback]::Ignore()
+                    $response = Invoke-WebRequest -Uri https://$($NODE_IP):10250/pods  -Headers @{'Authorization' = "Bearer $(Get-Content /var/run/secrets/kubernetes.io/serviceaccount/token)" } -UseBasicParsing
+                    Write-Host "Response status code of API call to https://$($NODE_IP):10250/pods : $($response.StatusCode)"
+                    if (![string]::IsNullOrEmpty($response) -and $response.StatusCode -eq 200) {
+                        Write-Host "API call to https://$($NODE_IP):10250/pods succeeded"
+                        $isPodsAPISuccess = $true
+                    }
+                }
+                catch {
+                    Write-Host "API call to https://$($NODE_IP):10250/pods failed"
+                }
+            }
+
+            if ($isPodsAPISuccess) {
+                if (![string]::IsNullOrEmpty($response.Content)) {
+                    $podList = $response.Content | ConvertFrom-Json
+                    if (![string]::IsNullOrEmpty($podList)) {
+                        $podItems = $podList.Items
+                        if ($podItems.Length -gt 0) {
+                            Write-Host "found pod items: $($podItems.Length)"
+                            for ($index = 0; $index -le $podItems.Length ; $index++) {
+                                Write-Host "current podItem index : $($index)"
+                                $pod = $podItems[$index]
+                                if (![string]::IsNullOrEmpty($pod) -and
+                                    ![string]::IsNullOrEmpty($pod.status) -and
+                                    ![string]::IsNullOrEmpty($pod.status.phase) -and
+                                    $pod.status.phase -eq "Running" -and
+                                    $pod.status.ContainerStatuses.Length -gt 0) {
+                                    $containerID = $pod.status.ContainerStatuses[0].containerID
+                                    $detectedContainerRuntime = $containerID.split(":")[0].trim()
+                                    Write-Host "detected containerRuntime as : $($detectedContainerRuntime)"
+                                    if (![string]::IsNullOrEmpty($detectedContainerRuntime) -and [string]$detectedContainerRuntime.StartsWith('docker') -eq $false) {
+                                        $containerRuntime = $detectedContainerRuntime
+                                    }
+                                    Write-Host "using containerRuntime as : $($containerRuntime)"
+                                    break
+                                }
+                            }
+                        }
+                        else {
+                            Write-Host "got podItems count is 0 hence using default container runtime:  $($containerRuntime)"
+                        }
+
+
+                    }
+                    else {
+                        Write-Host "got podList null or empty hence using default container runtime:  $($containerRuntime)"
+                    }
+                }
+                else {
+                    Write-Host "got empty response content for /Pods API call hence using default container runtime:  $($containerRuntime)"
+                }
+            }
+        }
+        else {
+            Write-Host "got empty NODE_IP environment variable"
+        }
+        # set CONTAINER_RUNTIME env for debug and telemetry purpose
+        [System.Environment]::SetEnvironmentVariable("CONTAINER_RUNTIME", $containerRuntime, "Process")
+        [System.Environment]::SetEnvironmentVariable("CONTAINER_RUNTIME", $containerRuntime, "Machine")
+    }
+    catch {
+        $e = $_.Exception
+        Write-Host $e
+        Write-Host "exception occured on getting container runtime hence using default container runtime: $($containerRuntime)"
+    }
+
+    return $containerRuntime
+}
+
+function Start-Fluent {
+
     # Run fluent-bit service first so that we do not miss any logs being forwarded by the fluentd service.
     # Run fluent-bit as a background job. Switch this to a windows service once fluent-bit supports natively running as a windows service
     Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\fluent-bit\bin\fluent-bit.exe" -ArgumentList @("-c", "C:\etc\fluent-bit\fluent-bit.conf", "-e", "C:\opt\omsagentwindows\out_oms.so") }
 
+    $containerRuntime = Get-ContainerRuntime
+
     #register fluentd as a service and start
     # there is a known issues with win32-service https://github.com/chef/win32-service/issues/70
+    if (![string]::IsNullOrEmpty($containerRuntime) -and [string]$containerRuntime.StartsWith('docker') -eq $false) {
+        # change parser from docker to cri if the container runtime is not docker
+        Write-Host "changing parser from Docker to CRI since container runtime : $($containerRuntime) and which is non-docker"
+        (Get-Content -Path C:/etc/fluent/fluent.conf -Raw)  -replace 'fluent-docker-parser.conf','fluent-cri-parser.conf' | Set-Content C:/etc/fluent/fluent.conf
+    }
+
     fluentd --reg-winsvc i --reg-winsvc-auto-start --winsvc-name fluentdwinaks --reg-winsvc-fluentdopt '-c C:/etc/fluent/fluent.conf -o C:/etc/fluent/fluent.log'
 
     Notepad.exe | Out-Null
 }
 
-function Generate-Certificates
-{
+function Generate-Certificates {
     Write-Host "Generating Certificates"
     C:\\opt\\omsagentwindows\\certgenerator\\certificategenerator.exe
 }
 
-function Test-CertificatePath
-{
+function Test-CertificatePath {
     $certLocation = $env:CI_CERT_LOCATION
-    $keyLocation =  $env:CI_KEY_LOCATION
-    if  (!(Test-Path $certLocation))
-    {
+    $keyLocation = $env:CI_KEY_LOCATION
+    if (!(Test-Path $certLocation)) {
         Write-Host "Certificate file not found at $($certLocation). EXITING....."
         exit 1
     }
-    else
-    {
+    else {
         Write-Host "Certificate file found at $($certLocation)"
     }
 
-    if (! (Test-Path $keyLocation))
-    {
+    if (! (Test-Path $keyLocation)) {
         Write-Host "Key file not found at $($keyLocation). EXITING...."
         exit 1
     }
-    else
-    {
+    else {
         Write-Host "Key file found at $($keyLocation)"
     }
 }
@@ -172,7 +293,7 @@ Test-CertificatePath
 Start-Fluent
 
 # List all powershell processes running. This should have main.ps1 and filesystemwatcher.ps1
-Get-WmiObject Win32_process | Where-Object {$_.Name -match 'powershell'} | Format-Table -Property Name, CommandLine, ProcessId
+Get-WmiObject Win32_process | Where-Object { $_.Name -match 'powershell' } | Format-Table -Property Name, CommandLine, ProcessId
 
 #check if fluentd service is running
 Get-Service fluentdwinaks

From bac8a32aa72b50a2e1ac1844404d7dbdb9ed4d04 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 19 Aug 2020 19:16:31 -0700
Subject: [PATCH 11/60] Gangams/arc k8s metrics  (#413)

* cluster identity token

* wip

* fix exception

* fix exceptions

* fix exception

* fix bug

* fix bug

* minor update

* refactor the code

* more refactoring

* fix bug

* typo fix

* fix typo

* wait for 1min after token renewal request

* add proxy support for arc k8s mdm endpoint

* avoid additional get call

* minor line ending fix

* wip

* have separate log for arc k8s cluster identity

* fix bug on creating crd resource

* remove update permission since not required

* fixed some bugs

* fix pr feedback

* remove list since its not required
---
 README.md                                     |   6 +-
 build/linux/Makefile                          |   2 +-
 .../installer/datafiles/base_container.data   |   6 +-
 .../templates/omsagent-arc-k8s-crd.yaml       |   9 +
 .../templates/omsagent-rbac.yaml              |   8 +
 .../build-and-publish-docker-image.sh         |   2 +-
 .../build-and-publish-docker-image.ps1        |   2 +-
 source/plugins/ruby/KubernetesApiClient.rb    |  17 +-
 .../plugins/ruby/arc_k8s_cluster_identity.rb  | 216 ++++++++++++++++++
 source/plugins/ruby/out_mdm.rb                |  61 +++--
 10 files changed, 307 insertions(+), 22 deletions(-)
 create mode 100644 charts/azuremonitor-containers/templates/omsagent-arc-k8s-crd.yaml
 create mode 100644 source/plugins/ruby/arc_k8s_cluster_identity.rb

diff --git a/README.md b/README.md
index 659fe0161..d5d874c9c 100644
--- a/README.md
+++ b/README.md
@@ -200,11 +200,15 @@ docker build -t <repo>/<imagename>:<imagetag> --build-arg IMAGE_TAG=<imagetag> .
 docker push <repo>/<imagename>:<imagetag>
 ```
 
-### Build Cert generator, Out OMS Plugun and Docker Image and Publish Docker Image
+### Build Cert generator, Out OMS Plugin and Docker Image and Publish Docker Image
 
 If you have code cloned on to windows, you can built everything for windows agent on windows machine via below instructions
 
 ```
+# install pre-requisites if you havent installed already
+cd %userprofile%\Docker-Provider\kubernetes\windows # based on your repo path
+.\install-build-pre-requisites.ps1
+
 cd %userprofile%\Docker-Provider\kubernetes\windows\dockerbuild # based on your repo path
 docker login # if you want to publish the image to acr then login to acr via `docker login <acr-name>`
 powershell -ExecutionPolicy bypass  # switch to powershell if you are not on powershell already
diff --git a/build/linux/Makefile b/build/linux/Makefile
index 0a20ed205..3f35e1204 100644
--- a/build/linux/Makefile
+++ b/build/linux/Makefile
@@ -118,7 +118,7 @@ distclean : clean
 PROVIDER_STATUS:
 	@echo "========================= Performing Building provider"
 	@echo "clean up everything under: $(INTERMEDIATE_BASE_DIR) to avoid picking up old binaries"
-	$(RMDIR) $(INTERMEDIATE_BASE_DIR)
+	sudo $(RMDIR) $(INTERMEDIATE_BASE_DIR)
 
 KIT_STATUS:
 	@echo "========================= Performing Building provider tests"
diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data
index fc5a6c8bc..87b89b14c 100644
--- a/build/linux/installer/datafiles/base_container.data
+++ b/build/linux/installer/datafiles/base_container.data
@@ -50,7 +50,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/kubernetes_container_inventory.rb;							source/plugins/ruby/kubernetes_container_inventory.rb;		644; root; root
 /opt/microsoft/omsagent/plugin/proxy_utils.rb;						                  	source/plugins/ruby/proxy_utils.rb;		644; root; root
 
-
+/opt/microsoft/omsagent/plugin/arc_k8s_cluster_identity.rb;                                 source/plugins/ruby/arc_k8s_cluster_identity.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/out_mdm.rb;                                                  source/plugins/ruby/out_mdm.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/filter_cadvisor2mdm.rb;                                      source/plugins/ruby/filter_cadvisor2mdm.rb; 644; root; root
 /opt/microsoft/omsagent/plugin/filter_telegraf2mdm.rb;                                      source/plugins/ruby/filter_telegraf2mdm.rb; 644; root; root
@@ -276,6 +276,10 @@ touch /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
 chmod 666 /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
 chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/fluent_forward_failed.log
 
+touch /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log
+chmod 666 /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log
+chown omsagent:omiusers /var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log
+
 mv /etc/opt/microsoft/docker-cimprov/container.conf /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
 chown omsagent:omsagent /etc/opt/microsoft/omsagent/sysconf/omsagent.d/container.conf
 
diff --git a/charts/azuremonitor-containers/templates/omsagent-arc-k8s-crd.yaml b/charts/azuremonitor-containers/templates/omsagent-arc-k8s-crd.yaml
new file mode 100644
index 000000000..f7873de40
--- /dev/null
+++ b/charts/azuremonitor-containers/templates/omsagent-arc-k8s-crd.yaml
@@ -0,0 +1,9 @@
+{{- if contains "microsoft.kubernetes/connectedclusters" (.Values.omsagent.env.clusterId | lower) }}
+apiVersion:  clusterconfig.azure.com/v1beta1
+kind: AzureClusterIdentityRequest
+metadata:
+  name: container-insights-clusteridentityrequest
+  namespace: azure-arc
+spec:
+  audience: https://monitoring.azure.com/
+{{- end }}
diff --git a/charts/azuremonitor-containers/templates/omsagent-rbac.yaml b/charts/azuremonitor-containers/templates/omsagent-rbac.yaml
index 9903f41ff..4f7408e7c 100644
--- a/charts/azuremonitor-containers/templates/omsagent-rbac.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-rbac.yaml
@@ -27,8 +27,16 @@ rules:
 - apiGroups: ["azmon.container.insights"]
   resources: ["healthstates"]
   verbs: ["get", "create", "patch"]
+- apiGroups: ["clusterconfig.azure.com"]
+  resources: ["azureclusteridentityrequests"]
+  resourceNames: ["container-insights-clusteridentityrequest"]
+  verbs: ["get", "create", "patch"]
 - nonResourceURLs: ["/metrics"]
   verbs: ["get"]
+- apiGroups: [""]
+  resources: ["secrets"]
+  resourceNames: ["container-insights-clusteridentityrequest-token"]
+  verbs: ["get"]
 ---
 kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1beta1
diff --git a/kubernetes/linux/dockerbuild/build-and-publish-docker-image.sh b/kubernetes/linux/dockerbuild/build-and-publish-docker-image.sh
index 982c8c491..267f15f32 100644
--- a/kubernetes/linux/dockerbuild/build-and-publish-docker-image.sh
+++ b/kubernetes/linux/dockerbuild/build-and-publish-docker-image.sh
@@ -127,7 +127,7 @@ baseDir=$(dirname $kubernetsDir)
 buildDir=$baseDir/build/linux
 dockerFileDir=$baseDir/kubernetes/linux
 
-echo "sour code base directory: $baseDir"
+echo "source code base directory: $baseDir"
 echo "build directory for docker provider: $buildDir"
 echo "docker file directory: $dockerFileDir"
 
diff --git a/kubernetes/windows/dockerbuild/build-and-publish-docker-image.ps1 b/kubernetes/windows/dockerbuild/build-and-publish-docker-image.ps1
index 27be90d48..dbcfa6097 100644
--- a/kubernetes/windows/dockerbuild/build-and-publish-docker-image.ps1
+++ b/kubernetes/windows/dockerbuild/build-and-publish-docker-image.ps1
@@ -35,7 +35,7 @@ $imagerepo = $imageparts[0]
 if ($imagetag.StartsWith("win-") -eq $false)
 {
     Write-Host "adding win- prefix image tag since its not provided"
-    $imagetag = "win"-$imagetag
+    $imagetag = "win-$imagetag"
 }
 
 Write-Host "image tag used is :$imagetag"
diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb
index 987d290aa..36dcdd8c6 100644
--- a/source/plugins/ruby/KubernetesApiClient.rb
+++ b/source/plugins/ruby/KubernetesApiClient.rb
@@ -99,7 +99,6 @@ def getResourceUri(resource, api_group)
           elsif api_group == @@ApiGroupHPA
             return "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}/apis/" + @@ApiGroupHPA + "/" + @@ApiVersionHPA + "/" + resource
           end
-          
         else
           @Log.warn ("Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV["KUBERNETES_SERVICE_HOST"]} KUBERNETES_PORT_443_TCP_PORT: #{ENV["KUBERNETES_PORT_443_TCP_PORT"]}. Unable to form resourceUri")
           return nil
@@ -743,7 +742,7 @@ def getResourcesAndContinuationToken(uri, api_group: nil)
       resourceInventory = nil
       begin
         @Log.info "KubernetesApiClient::getResourcesAndContinuationToken : Getting resources from Kube API using url: #{uri} @ #{Time.now.utc.iso8601}"
-        resourceInfo = getKubeResourceInfo(uri, api_group:api_group)
+        resourceInfo = getKubeResourceInfo(uri, api_group: api_group)
         @Log.info "KubernetesApiClient::getResourcesAndContinuationToken : Done getting resources from Kube API using url: #{uri} @ #{Time.now.utc.iso8601}"
         if !resourceInfo.nil?
           @Log.info "KubernetesApiClient::getResourcesAndContinuationToken:Start:Parsing data for #{uri} using yajl @ #{Time.now.utc.iso8601}"
@@ -761,5 +760,19 @@ def getResourcesAndContinuationToken(uri, api_group: nil)
       end
       return continuationToken, resourceInventory
     end #getResourcesAndContinuationToken
+
+    def getKubeAPIServerUrl
+      apiServerUrl = nil
+      begin
+        if ENV["KUBERNETES_SERVICE_HOST"] && ENV["KUBERNETES_PORT_443_TCP_PORT"]
+          apiServerUrl = "https://#{ENV["KUBERNETES_SERVICE_HOST"]}:#{ENV["KUBERNETES_PORT_443_TCP_PORT"]}"
+        else
+          @Log.warn "Kubernetes environment variable not set KUBERNETES_SERVICE_HOST: #{ENV["KUBERNETES_SERVICE_HOST"]} KUBERNETES_PORT_443_TCP_PORT: #{ENV["KUBERNETES_PORT_443_TCP_PORT"]}. Unable to form resourceUri"
+        end
+      rescue => errorStr
+        @Log.warn "KubernetesApiClient::getKubeAPIServerUrl:Failed  #{errorStr}"
+      end
+      return apiServerUrl
+    end
   end
 end
diff --git a/source/plugins/ruby/arc_k8s_cluster_identity.rb b/source/plugins/ruby/arc_k8s_cluster_identity.rb
new file mode 100644
index 000000000..ef55c3257
--- /dev/null
+++ b/source/plugins/ruby/arc_k8s_cluster_identity.rb
@@ -0,0 +1,216 @@
+# frozen_string_literal: true
+require "logger"
+require "net/http"
+require "net/https"
+require "uri"
+require "yajl/json_gem"
+require "base64"
+require "time"
+require_relative "KubernetesApiClient"
+require_relative "ApplicationInsightsUtility"
+
+class ArcK8sClusterIdentity
+  # this arc k8s crd version  and arc k8s  uses corresponding version v1beta1 vs v1 based on the k8s version for apiextensions.k8s.io
+  @@cluster_config_crd_api_version = "clusterconfig.azure.com/v1beta1"
+  @@cluster_identity_resource_name = "container-insights-clusteridentityrequest"
+  @@cluster_identity_resource_namespace = "azure-arc"
+  @@cluster_identity_token_secret_namespace = "azure-arc"
+  @@crd_resource_uri_template = "%{kube_api_server_url}/apis/%{cluster_config_crd_api_version}/namespaces/%{cluster_identity_resource_namespace}/azureclusteridentityrequests/%{cluster_identity_resource_name}"
+  @@secret_resource_uri_template = "%{kube_api_server_url}/api/v1/namespaces/%{cluster_identity_token_secret_namespace}/secrets/%{token_secret_name}"
+  @@azure_monitor_custom_metrics_audience = "https://monitoring.azure.com/"
+  @@cluster_identity_request_kind = "AzureClusterIdentityRequest"
+
+  def initialize
+    @LogPath = "/var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log"
+    @log = Logger.new(@LogPath, 1, 5000000)
+    @log.info "initialize start @ #{Time.now.utc.iso8601}"
+    @token_expiry_time = Time.now
+    @cached_access_token = String.new
+    @token_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+    @cert_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+    @kube_api_server_url = KubernetesApiClient.getKubeAPIServerUrl
+    if @kube_api_server_url.nil?
+      @log.warn "got api server url nil from KubernetesApiClient.getKubeAPIServerUrl @ #{Time.now.utc.iso8601}"
+    end
+    @http_client = get_http_client
+    @service_account_token = get_service_account_token
+    @log.info "initialize complete @ #{Time.now.utc.iso8601}"
+  end
+
+  def get_cluster_identity_token()
+    begin
+      # get the cluster msi identity token either if its empty or near expirty. Token is valid 24 hrs.
+      if @cached_access_token.to_s.empty? || (Time.now + 60 * 60 > @token_expiry_time) # Refresh token 1 hr from expiration
+        # renew the token if its near expiry
+        if !@cached_access_token.to_s.empty? && (Time.now + 60 * 60 > @token_expiry_time)
+          @log.info "renewing the token since its near expiry @ #{Time.now.utc.iso8601}"
+          renew_near_expiry_token
+          # sleep 60 seconds to get the renewed token  available
+          sleep 60
+        end
+        @log.info "get token reference from crd @ #{Time.now.utc.iso8601}"
+        tokenReference = get_token_reference_from_crd
+        if !tokenReference.nil? && !tokenReference.empty?
+          @token_expiry_time = Time.parse(tokenReference["expirationTime"])
+          token_secret_name = tokenReference["secretName"]
+          token_secret_data_name = tokenReference["dataName"]
+          # get the token from secret
+          @log.info "get token from secret @ #{Time.now.utc.iso8601}"
+          token = get_token_from_secret(token_secret_name, token_secret_data_name)
+          if !token.nil?
+            @cached_access_token = token
+          else
+            @log.warn "got token nil from secret: #{@token_secret_name}"
+          end
+        else
+          @log.warn "got token reference either nil or empty"
+        end
+      end
+    rescue => err
+      @log.warn "get_cluster_identity_token failed: #{err}"
+      ApplicationInsightsUtility.sendExceptionTelemetry(err, { "FeatureArea" => "MDM" })
+    end
+    return @cached_access_token
+  end
+
+  private
+
+  def get_token_from_secret(token_secret_name, token_secret_data_name)
+    token = nil
+    begin
+      secret_request_uri = @@secret_resource_uri_template % {
+        kube_api_server_url: @kube_api_server_url,
+        cluster_identity_token_secret_namespace: @@cluster_identity_token_secret_namespace,
+        token_secret_name: token_secret_name,
+      }
+      get_request = Net::HTTP::Get.new(secret_request_uri)
+      get_request["Authorization"] = "Bearer #{@service_account_token}"
+      @log.info "Making GET request to #{secret_request_uri} @ #{Time.now.utc.iso8601}"
+      get_response = @http_client.request(get_request)
+      @log.info "Got response of #{get_response.code} for #{secret_request_uri} @ #{Time.now.utc.iso8601}"
+      if get_response.code.to_i == 200
+        token_secret = JSON.parse(get_response.body)["data"]
+        cluster_identity_token = token_secret[token_secret_data_name]
+        token = Base64.decode64(cluster_identity_token)
+      end
+    rescue => err
+      @log.warn "get_token_from_secret API call failed: #{err}"
+      ApplicationInsightsUtility.sendExceptionTelemetry(err, { "FeatureArea" => "MDM" })
+    end
+    return token
+  end
+
+  private
+
+  def get_token_reference_from_crd()
+    tokenReference = {}
+    begin
+      crd_request_uri = @@crd_resource_uri_template % {
+        kube_api_server_url: @kube_api_server_url,
+        cluster_config_crd_api_version: @@cluster_config_crd_api_version,
+        cluster_identity_resource_namespace: @@cluster_identity_resource_namespace,
+        cluster_identity_resource_name: @@cluster_identity_resource_name,
+      }
+      get_request = Net::HTTP::Get.new(crd_request_uri)
+      get_request["Authorization"] = "Bearer #{@service_account_token}"
+      @log.info "Making GET request to #{crd_request_uri} @ #{Time.now.utc.iso8601}"
+      get_response = @http_client.request(get_request)
+      @log.info "Got response of #{get_response.code} for #{crd_request_uri} @ #{Time.now.utc.iso8601}"
+      if get_response.code.to_i == 200
+        status = JSON.parse(get_response.body)["status"]
+        tokenReference["expirationTime"] = status["expirationTime"]
+        tokenReference["secretName"] = status["tokenReference"]["secretName"]
+        tokenReference["dataName"] = status["tokenReference"]["dataName"]
+      end
+    rescue => err
+      @log.warn "get_token_reference_from_crd call failed: #{err}"
+      ApplicationInsightsUtility.sendExceptionTelemetry(err, { "FeatureArea" => "MDM" })
+    end
+    return tokenReference
+  end
+
+  private
+
+  def renew_near_expiry_token()
+    begin
+      crd_request_uri = @@crd_resource_uri_template % {
+        kube_api_server_url: @kube_api_server_url,
+        cluster_config_crd_api_version: @@cluster_config_crd_api_version,
+        cluster_identity_resource_namespace: @@cluster_identity_resource_namespace,
+        cluster_identity_resource_name: @@cluster_identity_resource_name,
+      }
+      crd_request_body = get_crd_request_body
+      crd_request_body_json = crd_request_body.to_json
+      update_request = Net::HTTP::Patch.new(crd_request_uri)
+      update_request["Content-Type"] = "application/merge-patch+json"
+      update_request["Authorization"] = "Bearer #{@service_account_token}"
+      update_request.body = crd_request_body_json
+      update_response = @http_client.request(update_request)
+      @log.info "Got response of #{update_response.code} for PATCH #{crd_request_uri} @ #{Time.now.utc.iso8601}"
+      if update_response.code.to_i == 404
+        @log.info "since crd resource doesnt exist since creating crd resource : #{@@cluster_identity_resource_name} @ #{Time.now.utc.iso8601}"
+        create_request = Net::HTTP::Post.new(crd_request_uri)
+        create_request["Content-Type"] = "application/json"
+        create_request["Authorization"] = "Bearer #{@service_account_token}"
+        create_request.body = crd_request_body_json
+        create_response = @http_client.request(create_request)
+        @log.info "Got response of #{create_response.code} for POST #{crd_request_uri} @ #{Time.now.utc.iso8601}"
+      end
+    rescue => err
+      @log.warn "renew_near_expiry_token call failed: #{err}"
+      ApplicationInsightsUtility.sendExceptionTelemetry(err, { "FeatureArea" => "MDM" })
+    end
+  end
+
+  private
+
+  def get_service_account_token()
+    begin
+      if File.exist?(@token_file_path) && File.readable?(@token_file_path)
+        token_str = File.read(@token_file_path).strip
+        return token_str
+      else
+        @log.warn "Unable to read token string from #{@token_file_path}"
+        return nil
+      end
+    rescue => err
+      @log.warn "get_service_account_token call failed: #{err}"
+      ApplicationInsightsUtility.sendExceptionTelemetry(err, { "FeatureArea" => "MDM" })
+    end
+  end
+
+  private
+
+  def get_http_client()
+    begin
+      base_api_server_url = URI.parse(@kube_api_server_url)
+      http = Net::HTTP.new(base_api_server_url.host, base_api_server_url.port)
+      http.use_ssl = true
+      if !File.exist?(@cert_file_path)
+        raise "#{@cert_file_path} doesnt exist"
+      else
+        http.ca_file = @cert_file_path
+      end
+      http.verify_mode = OpenSSL::SSL::VERIFY_PEER
+      return http
+    rescue => err
+      @log.warn "Unable to create http client #{err}"
+      ApplicationInsightsUtility.sendExceptionTelemetry(err, { "FeatureArea" => "MDM" })
+    end
+    return nil
+  end
+
+  private
+
+  def get_crd_request_body
+    body = {}
+    body["apiVersion"] = @@cluster_config_crd_api_version
+    body["kind"] = @@cluster_identity_request_kind
+    body["metadata"] = {}
+    body["metadata"]["name"] = @@cluster_identity_resource_name
+    body["metadata"]["namespace"] = @@cluster_identity_resource_namespace
+    body["spec"] = {}
+    body["spec"]["audience"] = @@azure_monitor_custom_metrics_audience
+    return body
+  end
+end
diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb
index d801edb9a..b28c17034 100644
--- a/source/plugins/ruby/out_mdm.rb
+++ b/source/plugins/ruby/out_mdm.rb
@@ -16,6 +16,8 @@ def initialize
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
       require_relative "constants"
+      require_relative "arc_k8s_cluster_identity"
+      require_relative "proxy_utils"
 
       @@token_resource_url = "https://monitoring.azure.com/"
       @@grant_type = "client_credentials"
@@ -45,6 +47,8 @@ def initialize
       @useMsi = false
       @metrics_flushed_count = 0
 
+      @cluster_identity = nil
+      @isArcK8sCluster = false
       @get_access_token_backoff_expiry = Time.now
     end
 
@@ -76,28 +80,48 @@ def start
         if @can_send_data_to_mdm
           @log.info "MDM Metrics supported in #{aks_region} region"
 
+          if aks_resource_id.downcase.include?("microsoft.kubernetes/connectedclusters")
+            @isArcK8sCluster = true
+          end
           @@post_request_url = @@post_request_url_template % { aks_region: aks_region, aks_resource_id: aks_resource_id }
           @post_request_uri = URI.parse(@@post_request_url)
-          @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port)
+          if (!!@isArcK8sCluster)
+            proxy = (ProxyUtils.getProxyConfiguration)
+            if proxy.nil? || proxy.empty?
+              @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port)
+            else
+              @log.info "Proxy configured on this cluster: #{aks_resource_id}"
+              @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port, proxy[:addr], proxy[:port], proxy[:user], proxy[:pass])
+            end
+          else
+            @http_client = Net::HTTP.new(@post_request_uri.host, @post_request_uri.port)
+          end
           @http_client.use_ssl = true
           @log.info "POST Request url: #{@@post_request_url}"
           ApplicationInsightsUtility.sendCustomEvent("AKSCustomMetricsMDMPluginStart", {})
 
-          # Check to see if SP exists, if it does use SP. Else, use msi
-          sp_client_id = @data_hash["aadClientId"]
-          sp_client_secret = @data_hash["aadClientSecret"]
-
-          if (!sp_client_id.nil? && !sp_client_id.empty? && sp_client_id.downcase != "msi")
-            @useMsi = false
-            aad_token_url = @@aad_token_url_template % { tenant_id: @data_hash["tenantId"] }
-            @parsed_token_uri = URI.parse(aad_token_url)
+          # arc k8s cluster uses cluster identity
+          if (!!@isArcK8sCluster)
+            @log.info "using cluster identity token since cluster is azure arc k8s cluster"
+            @cluster_identity = ArcK8sClusterIdentity.new
+            @cached_access_token = @cluster_identity.get_cluster_identity_token
           else
-            @useMsi = true
-            msi_endpoint = @@msi_endpoint_template % { user_assigned_client_id: @@user_assigned_client_id, resource: @@token_resource_url }
-            @parsed_token_uri = URI.parse(msi_endpoint)
-          end
+            # Check to see if SP exists, if it does use SP. Else, use msi
+            sp_client_id = @data_hash["aadClientId"]
+            sp_client_secret = @data_hash["aadClientSecret"]
+
+            if (!sp_client_id.nil? && !sp_client_id.empty? && sp_client_id.downcase != "msi")
+              @useMsi = false
+              aad_token_url = @@aad_token_url_template % { tenant_id: @data_hash["tenantId"] }
+              @parsed_token_uri = URI.parse(aad_token_url)
+            else
+              @useMsi = true
+              msi_endpoint = @@msi_endpoint_template % { user_assigned_client_id: @@user_assigned_client_id, resource: @@token_resource_url }
+              @parsed_token_uri = URI.parse(msi_endpoint)
+            end
 
-          @cached_access_token = get_access_token
+            @cached_access_token = get_access_token
+          end
         end
       rescue => e
         @log.info "exception when initializing out_mdm #{e}"
@@ -226,7 +250,14 @@ def write(chunk)
 
     def send_to_mdm(post_body)
       begin
-        access_token = get_access_token
+        if (!!@isArcK8sCluster)
+          if @cluster_identity.nil?
+            @cluster_identity = ArcK8sClusterIdentity.new
+          end
+          access_token = @cluster_identity.get_cluster_identity_token
+        else
+          access_token = get_access_token
+        end
         request = Net::HTTP::Post.new(@post_request_uri.request_uri)
         request["Content-Type"] = "application/x-ndjson"
         request["Authorization"] = "Bearer #{access_token}"

From ab03640d2314b1e37a8a248c086b40adf5a2dbe4 Mon Sep 17 00:00:00 2001
From: bragi92 <kadubey@microsoft.com>
Date: Thu, 20 Aug 2020 17:51:57 -0700
Subject: [PATCH 12/60] fix: Reverting back to ltsc2019 tag (#429)

---
 kubernetes/windows/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index 06e11e73a..70a5f6045 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/windows/servercore@sha256:921bed01c2a023310bdbaa288edebd82c4910e536ff206b87e9cbe703ca27505
+FROM mcr.microsoft.com/windows/servercore:ltsc2019
 MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"

From af0f98176fb85c5cb2366b6927525867c217afeb Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Thu, 27 Aug 2020 16:44:26 -0700
Subject: [PATCH 13/60] more kubelet metrics (#430)

* more kubelet metrics

* celan up new config
---
 build/linux/installer/conf/telegraf.conf | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf
index 19b6058be..28a74a3d0 100644
--- a/build/linux/installer/conf/telegraf.conf
+++ b/build/linux/installer/conf/telegraf.conf
@@ -627,6 +627,7 @@
 #    ACSResourceName = "$TELEMETRY_ACS_RESOURCE_NAME"
 #    Region = "$TELEMETRY_AKS_REGION"
 
+#kubelet-1
 [[inputs.prometheus]]
   name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
@@ -669,6 +670,28 @@
   [inputs.prometheus.tagpass]
     operation_type = ["create_container", "remove_container", "pull_image"]
 
+#kubelet-2
+[[inputs.prometheus]]
+  name_prefix="container.azm.ms/"
+  ## An array of urls to scrape metrics from.
+  urls = ["$CADVISOR_METRICS_URL"]
+  
+  fieldpass = ["kubelet_running_pod_count","volume_manager_total_volumes", "kubelet_node_config_error", "process_resident_memory_bytes", "process_cpu_seconds_total"]
+
+  metric_version = 2
+  url_tag = "scrapeUrl"
+
+
+  ## Use bearer token for authorization. ('bearer_token' takes priority)
+  bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+  ## Specify timeout duration for slower prometheus clients (default is 3s)
+  response_timeout = "15s"
+
+  ## Optional TLS Config
+  tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+  insecure_skip_verify = true
+  
+
 ## prometheus custom metrics
 [[inputs.prometheus]]
 

From 7fc4d4cb03648a081dd9e0fceefc4b742e14021a Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Mon, 31 Aug 2020 18:55:20 -0700
Subject: [PATCH 14/60] fix nom issue when config is empty (#432)

---
 build/linux/installer/conf/telegraf-rs.conf   |  4 +-
 build/linux/installer/conf/telegraf.conf      |  2 +-
 .../scripts/tomlparser-npm-config.rb          | 83 ++++++++++++-------
 3 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/build/linux/installer/conf/telegraf-rs.conf b/build/linux/installer/conf/telegraf-rs.conf
index 3f2f65cff..d81196330 100644
--- a/build/linux/installer/conf/telegraf-rs.conf
+++ b/build/linux/installer/conf/telegraf-rs.conf
@@ -615,8 +615,8 @@ $AZMON_RS_PROM_PLUGINS_WITH_NAMESPACE_FILTER
 [[inputs.prometheus]]
   #name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
-  urls = ["$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER"]
-  fielddrop = ["$AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER"]
+  urls = $AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER
+  fielddrop = $AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER
 
   metric_version = 2
   url_tag = "scrapeUrl"
diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf
index 28a74a3d0..013aa1af2 100644
--- a/build/linux/installer/conf/telegraf.conf
+++ b/build/linux/installer/conf/telegraf.conf
@@ -730,7 +730,7 @@
 [[inputs.prometheus]]
   #name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
-  urls = ["$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE"]
+  urls = $AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE
   
   metric_version = 2
   url_tag = "scrapeUrl"
diff --git a/build/linux/installer/scripts/tomlparser-npm-config.rb b/build/linux/installer/scripts/tomlparser-npm-config.rb
index c5953836b..777fef209 100644
--- a/build/linux/installer/scripts/tomlparser-npm-config.rb
+++ b/build/linux/installer/scripts/tomlparser-npm-config.rb
@@ -14,9 +14,13 @@
 @configSchemaVersion = ""
 @collect_basic_npm_metrics = false
 @collect_advanced_npm_metrics = false
-@npm_node_url="http://$NODE_IP:10091/node-metrics"
-@npm_cluster_url="http://npm-metrics-cluster-service.kube-system:9000/cluster-metrics"
-@npm_basic_drop_metrics_cluster = "npm_ipset_counts"
+@npm_default_setting = "[]"
+@npm_node_urls = "[\"http://$NODE_IP:10091/node-metrics\"]"
+@npm_cluster_urls="[\"http://npm-metrics-cluster-service.kube-system:9000/cluster-metrics\"]"
+@npm_basic_drop_metrics_cluster = "[\"npm_ipset_counts\"]"
+@tgfConfigFileDS = "/etc/opt/microsoft/docker-cimprov/telegraf.conf"
+@tgfConfigFileRS = "/etc/opt/microsoft/docker-cimprov/telegraf-rs.conf"
+@replicaset = "replicaset"
 
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap
@@ -42,37 +46,37 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   begin
     if !parsedConfig.nil? && !parsedConfig[:integrations].nil? && !parsedConfig[:integrations][:azure_network_policy_manager].nil? && !parsedConfig[:integrations][:azure_network_policy_manager][:collect_advanced_metrics].nil?
         advanced_npm_metrics = parsedConfig[:integrations][:azure_network_policy_manager][:collect_advanced_metrics].to_s
-        puts "got:integrations.azure_network_policy_manager.collect_advanced_metrics='#{advanced_npm_metrics}'"
+        puts "config::npm::got:integrations.azure_network_policy_manager.collect_advanced_metrics='#{advanced_npm_metrics}'"
         if !advanced_npm_metrics.nil? && advanced_npm_metrics.strip.casecmp("true") == 0
             @collect_advanced_npm_metrics = true
         else
             @collect_advanced_npm_metrics = false
         end
-        puts "set:integrations.azure_network_policy_manager.collect_advanced_metrics=#{@collect_advanced_npm_metrics}"
+        puts "config::npm::set:integrations.azure_network_policy_manager.collect_advanced_metrics=#{@collect_advanced_npm_metrics}"
     end
   rescue => errorStr
-    puts "config::error:Exception while reading config settings for npm advanced setting - #{errorStr}, using defaults"
+    puts "config::npm::error:Exception while reading config settings for npm advanced setting - #{errorStr}, using defaults"
     @collect_advanced_npm_metrics = false
   end
   begin
     if !parsedConfig.nil? && !parsedConfig[:integrations].nil? && !parsedConfig[:integrations][:azure_network_policy_manager].nil? && !parsedConfig[:integrations][:azure_network_policy_manager][:collect_basic_metrics].nil?
         basic_npm_metrics = parsedConfig[:integrations][:azure_network_policy_manager][:collect_basic_metrics].to_s
-        puts "got:integrations.azure_network_policy_manager.collect_basic_metrics='#{basic_npm_metrics}'"
+        puts "config::npm::got:integrations.azure_network_policy_manager.collect_basic_metrics='#{basic_npm_metrics}'"
         if !basic_npm_metrics.nil? && basic_npm_metrics.strip.casecmp("true") == 0
             @collect_basic_npm_metrics = true
         else
             @collect_basic_npm_metrics = false
         end
-        puts "set:integrations.azure_network_policy_manager.collect_basic_metrics=#{@collect_basic_npm_metrics}"
+        puts "config::npm::set:integrations.azure_network_policy_manager.collect_basic_metrics=#{@collect_basic_npm_metrics}"
     end
   rescue => errorStr
-    puts "config::error:Exception while reading config settings for npm basic setting - #{errorStr}, using defaults"
+    puts "config::npm::error:Exception while reading config settings for npm basic setting - #{errorStr}, using defaults"
     @collect_basic_npm_metrics = false
   end
 end
 
 @configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
-puts "****************Start Config Processing********************"
+puts "****************Start NPM Config Processing********************"
 if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
   configMapSettings = parseConfigMap
   if !configMapSettings.nil?
@@ -80,34 +84,53 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   end
 else
   if (File.file?(@configMapMountPath))
-    ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
+    ConfigParseErrorLogger.logError("config::npm::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
   end
   @collect_basic_npm_metrics = false
   @collect_advanced_npm_metrics = false
 end
 
-# Write the settings to file, so that they can be set as environment variables
-file = File.open("integration_npm_config_env_var", "w")
 
-if !file.nil?
+
+controller = ENV["CONTROLLER_TYPE"]
+tgfConfigFile = @tgfConfigFileDS
+
+if controller.casecmp(@replicaset) == 0
+  tgfConfigFile = @tgfConfigFileRS
+end
+
+#replace place holders in configuration file
+tgfConfig = File.read(tgfConfigFile) #read returns only after closing the file
+
+if @collect_advanced_npm_metrics == true
+  tgfConfig = tgfConfig.gsub("$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE", @npm_node_urls)
+  tgfConfig = tgfConfig.gsub("$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER", @npm_cluster_urls)
+  tgfConfig = tgfConfig.gsub("$AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER", @npm_default_setting)
+elsif @collect_basic_npm_metrics == true
+  tgfConfig = tgfConfig.gsub("$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE", @npm_node_urls)
+  tgfConfig = tgfConfig.gsub("$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER", @npm_cluster_urls)
+  tgfConfig = tgfConfig.gsub("$AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER", @npm_basic_drop_metrics_cluster)
+else
+  tgfConfig = tgfConfig.gsub("$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE", @npm_default_setting)
+  tgfConfig = tgfConfig.gsub("$AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER", @npm_default_setting)
+  tgfConfig = tgfConfig.gsub("$AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER", @npm_default_setting)
+end
+
+File.open(tgfConfigFile, "w") { |file| file.puts tgfConfig } # 'file' will be closed here after it goes out of scope
+puts "config::npm::Successfully substituted the NPM placeholders into #{tgfConfigFile} file for #{controller}"
+
+# Write the telemetry to file, so that they can be set as environment variables
+telemetryFile = File.open("integration_npm_config_env_var", "w")
+
+if !telemetryFile.nil?
   if @collect_advanced_npm_metrics == true 
-    file.write("export TELEMETRY_NPM_INTEGRATION_METRICS_ADVANCED=1\n")
-    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE=#{@npm_node_url}\n")
-    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER=#{@npm_cluster_url}\n")
-    file.write("export AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER=\n")
+    telemetryFile.write("export TELEMETRY_NPM_INTEGRATION_METRICS_ADVANCED=1\n")
   elsif @collect_basic_npm_metrics == true
-    file.write("export TELEMETRY_NPM_INTEGRATION_METRICS_BASIC=1\n")
-    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE=#{@npm_node_url}\n")
-    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER=#{@npm_cluster_url}\n")
-    file.write("export AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER=#{@npm_basic_drop_metrics_cluster}\n")
-  else
-    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE=\n")
-    file.write("export AZMON_INTEGRATION_NPM_METRICS_URL_LIST_CLUSTER=\n")
-    file.write("export AZMON_INTEGRATION_NPM_METRICS_DROP_LIST_CLUSTER=\n")
+    telemetryFile.write("export TELEMETRY_NPM_INTEGRATION_METRICS_BASIC=1\n")
   end
   # Close file after writing all environment variables
-  file.close
+  telemetryFile.close
 else
-  puts "Exception while opening file for writing config environment variables"
-  puts "****************End Config Processing********************"
-end
\ No newline at end of file
+  puts "config::npm::Exception while opening file for writing NPM telemetry environment variables"
+  puts "****************End NPM Config Processing********************"
+end

From 281a77c8c871d6d9a3ad98715098234c1f027302 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 1 Sep 2020 16:21:04 -0700
Subject: [PATCH 15/60] support multiple docker paths when docker root is
 updated thru knode (#433)

---
 kubernetes/omsagent.yaml | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index db788a37e..947620ebc 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -387,6 +387,13 @@ spec:
               name: host-log
             - mountPath: /var/lib/docker/containers
               name: containerlog-path
+              readOnly: true
+            - mountPath: /mnt/docker
+              name: containerlog-path-2
+              readOnly: true
+            - mountPath: /mnt/containers
+              name: containerlog-path-3
+              readOnly: true
             - mountPath: /etc/kubernetes/host
               name: azure-json-path
             - mountPath: /etc/omsagent-secret
@@ -444,6 +451,12 @@ spec:
         - name: containerlog-path
           hostPath:
             path: /var/lib/docker/containers
+        - name: containerlog-path-2
+          hostPath:
+            path: /mnt/docker
+        - name: containerlog-path-3
+          hostPath:
+            path: /mnt/containers
         - name: azure-json-path
           hostPath:
             path: /etc/kubernetes
@@ -528,8 +541,6 @@ spec:
               name: docker-sock
             - mountPath: /var/log
               name: host-log
-            - mountPath: /var/lib/docker/containers
-              name: containerlog-path
             - mountPath: /etc/kubernetes/host
               name: azure-json-path
             - mountPath: /etc/omsagent-secret
@@ -588,9 +599,6 @@ spec:
         - name: host-log
           hostPath:
             path: /var/log
-        - name: containerlog-path
-          hostPath:
-            path: /var/lib/docker/containers
         - name: azure-json-path
           hostPath:
             path: /etc/kubernetes

From d8d7f9feac3e402b9a004cf2a15e57e5efd445d1 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Thu, 10 Sep 2020 17:22:03 -0700
Subject: [PATCH 16/60] Gangams/doc and other related updates (#434)

* bring back nodeslector changes for windows agent ds

* readme updates

* chart updates for azure cluster resourceid and region

* set cluster region during onboarding for managed clusters

* wip

* fix for onboarding script

* add sp support for the login

* update help

* add sp support for powershell

* script updates for sp login

* wip

* wip

* wip

* readme updates

* update the links to use ci_prod branch

* fix links

* fix image link

* some more readme updates
---
 README.md                                     | 62 +++++++++-------
 charts/azuremonitor-containers/Chart.yaml     |  2 +-
 .../templates/NOTES.txt                       |  4 +-
 .../templates/omsagent-arc-k8s-crd.yaml       |  2 +-
 .../templates/omsagent-daemonset-windows.yaml | 12 +++-
 .../templates/omsagent-daemonset.yaml         |  9 ++-
 .../templates/omsagent-deployment.yaml        |  9 ++-
 .../templates/omsagent-rs-configmap.yaml      |  2 +-
 .../templates/omsagent-secret.yaml            |  2 +-
 charts/azuremonitor-containers/values.yaml    | 18 ++---
 img/azuremonitor-containers.svg               | 66 +++++++++++++++++
 .../linux/acrworkflows/acrdevnamespace.yaml   |  4 +-
 .../add-monitoring-metrics-publisher-role.md  |  8 +--
 .../aks/mdmonboarding/mdm_onboarding.sh       |  2 +-
 .../mdmonboarding/mdm_onboarding_atscale.sh   |  2 +-
 .../kubernetes/AddMonitoringOnboardingTags.sh |  2 +-
 scripts/onboarding/attach-monitoring-tags.md  |  8 +--
 .../onboarding_azuremonitor_for_containers.sh |  2 +-
 .../onboarding/managed/disable-monitoring.ps1 | 36 +++++++++-
 .../onboarding/managed/disable-monitoring.sh  | 47 ++++++++++--
 .../onboarding/managed/enable-monitoring.ps1  | 38 +++++++++-
 .../onboarding/managed/enable-monitoring.sh   | 71 +++++++++++++++----
 scripts/onboarding/solution-onboarding.md     |  4 +-
 .../preview/health/HealthAgentOnboarding.ps1  |  2 +-
 scripts/troubleshoot/README.md                | 12 ++--
 scripts/troubleshoot/TroubleshootError.ps1    |  2 +-
 .../TroubleshootError_nonAzureK8s.ps1         |  2 +-
 27 files changed, 334 insertions(+), 96 deletions(-)
 create mode 100644 img/azuremonitor-containers.svg

diff --git a/README.md b/README.md
index d5d874c9c..3eec1f344 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,6 @@ The general directory structure is:
 │   ├── windows/                              - scripts to build the Docker image for Windows Agent
 │   │   ├── dockerbuild                       - script to build the code and docker imag, and publish docker image
 │   │   ├── acrworkflows/                     - acr work flows for the Windows Agent container image
-│   │   ├── baseimage/                        - windowsservercore base image for the windows agent container
 │   │   ├── DockerFile                        - DockerFile for Windows Agent Container Image
 │   │   ├── main.ps1                          - Windows Agent container entry point
 │   │   ├── setup.ps1                         - setup file for Windows Agent Container Image
@@ -140,7 +139,7 @@ bash ~/Docker-Provider/scripts/build/linux/install-build-pre-requisites.sh
 
 ### Build Docker Provider Shell Bundle and Docker Image and Publish Docker Image
 
-> Note: If you are using WSL2, ensure Docker for windows running Linux containers mode to build Linux agent image successfully
+> Note: If you are using WSL2, ensure `Docker for windows` running with Linux containers mode on your windows machine to build Linux agent image successfully
 
 ```
 cd ~/Docker-Provider/kubernetes/linux/dockerbuild
@@ -167,9 +166,23 @@ docker push <repo>/<imagename>:<imagetag>
 ```
 ## Windows Agent
 
+To build the windows agent, you will have to build .NET and Go code, and docker image for windows agent.
+Docker image for windows agent can only build on Windows machine with `Docker for windows` with Windows containers mode but the .NET code and Go code can be built either on Windows or Linux or WSL2.
+
 ### Install Pre-requisites
 
-If you are planning to build the .net and go code for windows agent on Linux machine and you have already have Docker for Windows on Windows machine, then you may skip this.
+Install pre-requisites based on OS platform you will be using to build the windows agent code
+
+#### Option 1 - Using Windows Machine to Build the Windows agent
+
+```
+powershell # launch powershell with elevated admin on your windows machine
+Set-ExecutionPolicy -ExecutionPolicy bypass # set the execution policy
+cd %userprofile%\Docker-Provider\scripts\build\windows # based on your repo path
+.\install-build-pre-requisites.ps1 #
+```
+
+#### Option 2 - Using WSL2 to Build the Windows agent
 
 ```
 powershell # launch powershell with elevated admin on your windows machine
@@ -178,20 +191,36 @@ net use z: \\wsl$\Ubuntu-16.04 # map the network drive of the ubuntu app to wind
 cd z:\home\sshadmin\Docker-Provider\scripts\build\windows # based on your repo path
 .\install-build-pre-requisites.ps1 #
 ```
-#### Build Certificate Generator Source code and Out OMS Go plugin code
 
-> Note: .net and go code for windows agent can built on Ubuntu
+
+### Build Windows Agent code and Docker Image
+
+> Note: format of the windows agent imagetag will be `win-ci<release><MMDDYYYY>`. possible values for release are test, dev, preview, dogfood, prod etc.
+
+#### Option 1 - Using Windows Machine to Build the Windows agent
+
+Execute below instructions on elevated command prompt to build windows agent code and docker image, publishing the image to acr or docker hub
+
+```
+cd %userprofile%\Docker-Provider\kubernetes\windows\dockerbuild # based on your repo path
+docker login # if you want to publish the image to acr then login to acr via `docker login <acr-name>`
+powershell -ExecutionPolicy bypass  # switch to powershell if you are not on powershell already
+.\build-and-publish-docker-image.ps1 -image <repo>/<imagename>:<imagetag> # trigger build code and image and publish docker hub or acr
+```
+
+#### Option 2 - Using WSL2 to Build the Windows agent
+
+##### On WSL2, Build Certificate Generator Source code and Out OMS Go plugin code
 
 ```
-cd ~/Docker-Provider/build/windows # based on your repo path on ubuntu or WSL2
+cd ~/Docker-Provider/build/windows # based on your repo path on WSL2 Ubuntu app
 pwsh #switch to powershell
 .\Makefile.ps1 # trigger build and publish of .net and go code
 ```
-> Note: format of the imagetag will be `win-ci<release><MMDDYYYY>`. possible values for release are test, dev, preview, dogfood, prod etc.
 
-####  Build and Push Docker Image
+####  On Windows machine, build and Push Docker Image
 
-> Note: windows container can only built on windows hence you will have to execute below commands on windows via accessing network share or copying published bits omsagentwindows under kubernetes directory on to windows machine
+> Note: Docker image for windows container can only built on windows hence you will have to execute below commands on windows via accessing network share or copying published bits omsagentwindows under kubernetes directory on to windows machine
 
 ```
 net use z: \\wsl$\Ubuntu-16.04 # map the network drive of the ubuntu app to windows
@@ -200,21 +229,6 @@ docker build -t <repo>/<imagename>:<imagetag> --build-arg IMAGE_TAG=<imagetag> .
 docker push <repo>/<imagename>:<imagetag>
 ```
 
-### Build Cert generator, Out OMS Plugin and Docker Image and Publish Docker Image
-
-If you have code cloned on to windows, you can built everything for windows agent on windows machine via below instructions
-
-```
-# install pre-requisites if you havent installed already
-cd %userprofile%\Docker-Provider\kubernetes\windows # based on your repo path
-.\install-build-pre-requisites.ps1
-
-cd %userprofile%\Docker-Provider\kubernetes\windows\dockerbuild # based on your repo path
-docker login # if you want to publish the image to acr then login to acr via `docker login <acr-name>`
-powershell -ExecutionPolicy bypass  # switch to powershell if you are not on powershell already
-.\build-and-publish-docker-image.ps1 -image <repo>/<imagename>:<imagetag> # trigger build code and image and publish docker hub or acr
-```
-
 # Azure DevOps Build Pipeline
 
 Navigate to https://github-private.visualstudio.com/microsoft/_build?view=pipelines to see Linux and Windows Agent build pipelines. These pipelines are configured with CI triggers for ci_dev and ci_prod.
diff --git a/charts/azuremonitor-containers/Chart.yaml b/charts/azuremonitor-containers/Chart.yaml
index 202494152..8976b5561 100644
--- a/charts/azuremonitor-containers/Chart.yaml
+++ b/charts/azuremonitor-containers/Chart.yaml
@@ -28,7 +28,7 @@ keywords:
   - kubernetes
   - kuberneteshealth
 home: https://docs.microsoft.com/en-us/azure/monitoring/monitoring-container-health
-icon: https://raw.githubusercontent.com/Microsoft/OMS-docker/ci_feature/img/azuremonitor-containers.svg
+icon: https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/img/azuremonitor-containers.svg
 sources:
   - https://github.com/microsoft/Docker-Provider/tree/ci_prod
 maintainers:
diff --git a/charts/azuremonitor-containers/templates/NOTES.txt b/charts/azuremonitor-containers/templates/NOTES.txt
index 6179b6f1a..372cecb95 100644
--- a/charts/azuremonitor-containers/templates/NOTES.txt
+++ b/charts/azuremonitor-containers/templates/NOTES.txt
@@ -14,7 +14,7 @@
 
 {{- end }}
 
-{{- if and (ne .Values.omsagent.env.clusterName "<your_cluster_name>") (ne .Values.omsagent.env.clusterId "<your_cluster_id>") }}
+{{- if and (ne .Values.omsagent.env.clusterName "<your_cluster_name>") (ne .Values.omsagent.env.clusterId "<your_cluster_id>") (ne .Values.Azure.Cluster.ResourceId "<your_cluster_id>") }}
 
 ##############################################################################
 ####   ERROR: You did not provide cluster name                            ####
@@ -22,7 +22,7 @@
 
 {{- end }}
 
-{{- if or (eq .Values.omsagent.secret.key "<your_workspace_key>") (eq .Values.omsagent.secret.wsid "<your_workspace_id>") (and (eq .Values.omsagent.env.clusterName "<your_cluster_name>") (eq .Values.omsagent.env.clusterId "<your_cluster_id>"))}}
+{{- if or (eq .Values.omsagent.secret.key "<your_workspace_key>") (eq .Values.omsagent.secret.wsid "<your_workspace_id>") (and (eq .Values.omsagent.env.clusterName "<your_cluster_name>") (eq .Values.omsagent.env.clusterId "<your_cluster_id>") (eq .Values.Azure.Cluster.ResourceId "<your_cluster_id>") )}}
 
 This deployment will not complete. To proceed, run
  helm upgrade {{ .Release.Name }} \
diff --git a/charts/azuremonitor-containers/templates/omsagent-arc-k8s-crd.yaml b/charts/azuremonitor-containers/templates/omsagent-arc-k8s-crd.yaml
index f7873de40..ebdd5ea3f 100644
--- a/charts/azuremonitor-containers/templates/omsagent-arc-k8s-crd.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-arc-k8s-crd.yaml
@@ -1,4 +1,4 @@
-{{- if contains "microsoft.kubernetes/connectedclusters" (.Values.omsagent.env.clusterId | lower) }}
+{{- if or ( contains "microsoft.kubernetes/connectedclusters" (.Values.Azure.Cluster.ResourceId | lower) ) ( contains "microsoft.kubernetes/connectedclusters" (.Values.omsagent.env.clusterId | lower)) }}
 apiVersion:  clusterconfig.azure.com/v1beta1
 kind: AzureClusterIdentityRequest
 metadata:
diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
index b8e667398..7acd46c37 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
@@ -25,6 +25,8 @@ spec:
     dockerProviderVersion: {{ .Values.omsagent.image.dockerProviderVersion }}
     schema-versions: "v1"
   spec:
+   nodeSelector:
+      beta.kubernetes.io/os: windows
    {{- if .Values.omsagent.rbac }}
    serviceAccountName: omsagent
    {{- end }}
@@ -46,6 +48,13 @@ spec:
        - name: AKS_REGION
          value: {{ .Values.omsagent.env.clusterRegion | quote }}
        {{- end }}
+       {{- else if ne .Values.Azure.Cluster.ResourceId "<your_cluster_id>" }}
+       - name: AKS_RESOURCE_ID
+         value: {{ .Values.Azure.Cluster.ResourceId | quote }}
+       {{- if ne .Values.Azure.Cluster.Region "<your_cluster_region>" }}
+       - name: AKS_REGION
+         value: {{ .Values.Azure.Cluster.Region | quote }}
+       {{- end }}
        {{- else }}
        - name: ACS_RESOURCE_NAME
          value: {{ .Values.omsagent.env.clusterName | quote }}
@@ -80,9 +89,6 @@ spec:
               - C:\opt\omsagentwindows\scripts\cmd\livenessProbe.cmd
           periodSeconds: 60
           initialDelaySeconds: 180
-   {{- with .Values.omsagent.daemonsetwindows.affinity }}
-   affinity: {{- toYaml . | nindent 8 }}
-   {{- end }}
    {{- with .Values.omsagent.tolerations }}
    tolerations: {{- toYaml . | nindent 8 }}
    {{- end }}
diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
index d6d6171cd..7514247a0 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
@@ -1,4 +1,4 @@
-{{- if and (ne .Values.omsagent.secret.key "<your_workspace_key>") (ne .Values.omsagent.secret.wsid "<your_workspace_id>") (or (ne .Values.omsagent.env.clusterName "<your_cluster_name>") (ne .Values.omsagent.env.clusterId "<your_cluster_id>"))}}
+{{- if and (ne .Values.omsagent.secret.key "<your_workspace_key>") (ne .Values.omsagent.secret.wsid "<your_workspace_id>") (or (ne .Values.omsagent.env.clusterName "<your_cluster_name>") (ne .Values.omsagent.env.clusterId "<your_cluster_id>") (ne .Values.Azure.Cluster.ResourceId "<your_cluster_id>") )}}
 apiVersion: apps/v1
 kind: DaemonSet
 metadata:
@@ -46,6 +46,13 @@ spec:
        - name: AKS_REGION
          value: {{ .Values.omsagent.env.clusterRegion | quote }}
        {{- end }}
+       {{- else if ne .Values.Azure.Cluster.ResourceId "<your_cluster_id>" }}
+       - name: AKS_RESOURCE_ID
+         value: {{ .Values.Azure.Cluster.ResourceId | quote }}
+       {{- if ne .Values.Azure.Cluster.Region "<your_cluster_region>" }}
+       - name: AKS_REGION
+         value: {{ .Values.Azure.Cluster.Region | quote }}
+       {{- end }}
        {{- else }}
        - name: ACS_RESOURCE_NAME
          value: {{ .Values.omsagent.env.clusterName | quote }}
diff --git a/charts/azuremonitor-containers/templates/omsagent-deployment.yaml b/charts/azuremonitor-containers/templates/omsagent-deployment.yaml
index 6f8140eb6..7d7ac7040 100644
--- a/charts/azuremonitor-containers/templates/omsagent-deployment.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-deployment.yaml
@@ -1,4 +1,4 @@
-{{- if and (ne .Values.omsagent.secret.key "<your_workspace_key>") (ne .Values.omsagent.secret.wsid "<your_workspace_id>") (or (ne .Values.omsagent.env.clusterName "<your_cluster_name>") (ne .Values.omsagent.env.clusterId "<your_cluster_id>"))}}
+{{- if and (ne .Values.omsagent.secret.key "<your_workspace_key>") (ne .Values.omsagent.secret.wsid "<your_workspace_id>") (or (ne .Values.omsagent.env.clusterName "<your_cluster_name>") (ne .Values.omsagent.env.clusterId "<your_cluster_id>") (ne .Values.Azure.Cluster.ResourceId "<your_cluster_id>") )}}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -47,6 +47,13 @@ spec:
        - name: AKS_REGION
          value: {{ .Values.omsagent.env.clusterRegion | quote }}
        {{- end }}
+       {{- else if ne .Values.Azure.Cluster.ResourceId "<your_cluster_id>" }}
+       - name: AKS_RESOURCE_ID
+         value: {{ .Values.Azure.Cluster.ResourceId | quote }}
+       {{- if ne .Values.Azure.Cluster.Region "<your_cluster_region>" }}
+       - name: AKS_REGION
+         value: {{ .Values.Azure.Cluster.Region | quote }}
+       {{- end }}
        {{- else }}
        - name: ACS_RESOURCE_NAME
          value: {{ .Values.omsagent.env.clusterName | quote }}
diff --git a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
index c77fb12b4..ee0664495 100644
--- a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
@@ -1,4 +1,4 @@
-{{- if and (ne .Values.omsagent.secret.key "<your_workspace_key>") (ne .Values.omsagent.secret.wsid "<your_workspace_id>") (or (ne .Values.omsagent.env.clusterName "<your_cluster_name>") (ne .Values.omsagent.env.clusterId "<your_cluster_id>"))}}
+{{- if and (ne .Values.omsagent.secret.key "<your_workspace_key>") (ne .Values.omsagent.secret.wsid "<your_workspace_id>") (or (ne .Values.omsagent.env.clusterName "<your_cluster_name>") (ne .Values.omsagent.env.clusterId "<your_cluster_id>") (ne .Values.Azure.Cluster.ResourceId "<your_cluster_id>") )}}
 kind: ConfigMap
 apiVersion: v1
 data:
diff --git a/charts/azuremonitor-containers/templates/omsagent-secret.yaml b/charts/azuremonitor-containers/templates/omsagent-secret.yaml
index c6d992b82..1a7f087ed 100644
--- a/charts/azuremonitor-containers/templates/omsagent-secret.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-secret.yaml
@@ -1,4 +1,4 @@
-{{- if and (ne .Values.omsagent.secret.key "<your_workspace_key>") (ne .Values.omsagent.secret.wsid "<your_workspace_id>") (or (ne .Values.omsagent.env.clusterName "<your_cluster_name>") (ne .Values.omsagent.env.clusterId "<your_cluster_id>"))}}
+{{- if and (ne .Values.omsagent.secret.key "<your_workspace_key>") (ne .Values.omsagent.secret.wsid "<your_workspace_id>") (or (ne .Values.omsagent.env.clusterName "<your_cluster_name>") (ne .Values.omsagent.env.clusterId "<your_cluster_id>") (ne .Values.Azure.Cluster.ResourceId "<your_cluster_id>") )}}
 apiVersion: v1
 kind: Secret
 metadata:
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 610e109ef..4d0d7f8f2 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -3,7 +3,12 @@
 # Declare variables to be passed into your templates.
 
 ## Microsoft OMS Agent image for kubernetes cluster monitoring
-## ref: https://github.com/Microsoft/OMS-docker/tree/ci_feature_prod
+## ref: https://github.com/microsoft/Docker-Provider/tree/ci_prod
+## Values of ResourceId and Region under Azure->Cluster being populated by Azure Arc K8s RP during the installation of the extension
+Azure:
+  Cluster:
+    Region: <your_cluster_region>
+    ResourceId: <your_cluster_id>
 omsagent:
   image:
     repo: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod"
@@ -81,17 +86,6 @@ omsagent:
                   operator: NotIn
                   values:
                     - master
-  daemonsetwindows:
-    affinity:
-      nodeAffinity:
-        requiredDuringSchedulingIgnoredDuringExecution:
-          nodeSelectorTerms:
-            - labelSelector:
-              matchExpressions:
-                - key: beta.kubernetes.io/os
-                  operator: In
-                  values:
-                    - windows
   ## Configure resource requests and limits
   ## ref: http://kubernetes.io/docs/user-guide/compute-resources/
   ##
diff --git a/img/azuremonitor-containers.svg b/img/azuremonitor-containers.svg
new file mode 100644
index 000000000..b2f7c5323
--- /dev/null
+++ b/img/azuremonitor-containers.svg
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 22.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 width="50px" height="50px" viewBox="0 0 50 50" enable-background="new 0 0 50 50" xml:space="preserve">
+<g>
+	<polygon fill="#804998" points="21.085,35.649 21.085,23.254 32.56,16.933 44.036,23.254 44.036,35.649 32.56,41.97 	"/>
+	<g opacity="0.6">
+		<path fill="#804998" d="M32.56,19.216l9.475,5.22v10.031l-9.475,5.22l-9.475-5.22V24.436L32.56,19.216 M32.56,16.933
+			l-11.475,6.321v12.394L32.56,41.97l11.475-6.321V23.254L32.56,16.933L32.56,16.933z"/>
+	</g>
+</g>
+<g opacity="0.4">
+	<polygon fill="#FFFFFF" points="23.085,34.467 23.085,24.436 32.561,19.216 42.036,24.436 42.036,34.467 32.561,39.687 	"/>
+</g>
+<g>
+	<polygon fill="#804998" points="0.036,35.649 0.036,23.254 11.511,16.933 22.987,23.254 22.987,35.649 11.511,41.97 	"/>
+	<g opacity="0.6">
+		<path fill="#804998" d="M11.511,19.216l9.475,5.22v10.031l-9.475,5.22l-9.475-5.22V24.436L11.511,19.216 M11.511,16.933
+			L0.036,23.254v12.394l11.475,6.321l11.475-6.321V23.254L11.511,16.933L11.511,16.933z"/>
+	</g>
+</g>
+<g opacity="0.4">
+	<polygon fill="#FFFFFF" points="2.036,34.467 2.036,24.436 11.511,19.216 20.986,24.436 20.986,34.467 11.511,39.687 	"/>
+</g>
+<g>
+	<polygon fill="#804998" points="11.019,18.876 11.019,6.978 22.036,0.909 33.052,6.978 33.052,18.876 22.036,24.945 	"/>
+	<g opacity="0.6">
+		<path fill="#804998" d="M22.036,3.192l9.016,4.967v9.535l-9.016,4.967l-9.016-4.967V8.159L22.036,3.192 M22.036,0.909
+			L11.019,6.978v11.899l11.016,6.069l11.016-6.069V6.978L22.036,0.909L22.036,0.909z"/>
+	</g>
+</g>
+<g opacity="0.4">
+	<polygon fill="#FFFFFF" points="13.02,17.695 13.02,8.159 22.036,3.192 31.052,8.159 31.052,17.695 22.036,22.662 	"/>
+</g>
+<g opacity="0.4">
+	<path fill="#804998" d="M14.916,31.908c0.923-1.599,0.747-3.534-0.289-4.931l7.012-10.215c0.249,0.022,0.497,0.032,0.744,0.012
+		l6.492,10.138c-0.137,0.172-0.275,0.343-0.388,0.539c-1.24,2.148-0.501,4.905,1.646,6.145c2.148,1.24,4.905,0.501,6.145-1.646
+		c1.24-2.148,0.501-4.905-1.646-6.145c-0.787-0.454-1.655-0.637-2.5-0.589l-6.531-10.213c0.114-0.15,0.235-0.292,0.331-0.459
+		c1.24-2.148,0.501-4.905-1.646-6.145s-4.905-0.501-6.145,1.646c-0.923,1.599-0.747,3.534,0.289,4.931l-7.012,10.215
+		c-1.688-0.147-3.391,0.657-4.293,2.219c-1.24,2.148-0.501,4.905,1.646,6.145C10.919,34.795,13.676,34.056,14.916,31.908z"/>
+</g>
+<circle fill="#FFFFFF" cx="22.036" cy="12.294" r="3.58"/>
+<circle fill="#FFFFFF" cx="11.021" cy="29.659" r="3.58"/>
+<circle fill="#FFFFFF" cx="32.382" cy="29.699" r="3.58"/>
+<polygon fill="#FFFFFF" points="21.36,12.796 22.991,11.953 33.198,29.278 31.567,30.121 "/>
+<polygon fill="#FFFFFF" points="10.179,29.173 21.28,11.875 22.87,12.793 11.769,30.091 "/>
+<g>
+	<g opacity="0.4">
+		<path fill="#FFFFFF" d="M44.036,35.649V25.79C42.021,24.662,39.701,24,37.247,24H25v12.247c0,0.547,0.042,1.084,0.105,1.616
+			l7.456,4.107L44.036,35.649z"/>
+	</g>
+	<g>
+		<path fill="#0072C6" d="M37.247,25H26v11.247c0,6.6,5.35,12.653,11.95,12.653c6.6,0,11.95-5.35,11.95-11.95
+			C49.9,30.35,43.847,25,37.247,25z"/>
+	</g>
+	<g>
+		<rect x="37" y="36" fill="#FFFFFF" width="2" height="5"/>
+	</g>
+	<g>
+		<rect x="32" y="34" fill="#FFFFFF" width="2" height="7"/>
+	</g>
+	<g>
+		<rect x="42" y="31" fill="#FFFFFF" width="2" height="10"/>
+	</g>
+</g>
+</svg>
diff --git a/kubernetes/linux/acrworkflows/acrdevnamespace.yaml b/kubernetes/linux/acrworkflows/acrdevnamespace.yaml
index 9270be755..6a3617f6b 100644
--- a/kubernetes/linux/acrworkflows/acrdevnamespace.yaml
+++ b/kubernetes/linux/acrworkflows/acrdevnamespace.yaml
@@ -1,5 +1,5 @@
 version: 1.0-preview-1
 steps:
   - build: -t {{.Run.Registry}}/public/azuremonitor/containerinsights/cidev:{{.Run.Branch}}-{{.Run.Date}}-{{.Run.Commit | substr 0 7 }} .
-    workingDirectory: ci_feature
-  - push: ["{{.Run.Registry}}/public/azuremonitor/containerinsights/cidev:{{.Run.Branch}}-{{.Run.Date}}-{{.Run.Commit | substr 0 7 }}"] 
+    workingDirectory: ci_dev
+  - push: ["{{.Run.Registry}}/public/azuremonitor/containerinsights/cidev:{{.Run.Branch}}-{{.Run.Date}}-{{.Run.Commit | substr 0 7 }}"]
diff --git a/scripts/onboarding/add-monitoring-metrics-publisher-role.md b/scripts/onboarding/add-monitoring-metrics-publisher-role.md
index 822ff0f64..91b91d872 100644
--- a/scripts/onboarding/add-monitoring-metrics-publisher-role.md
+++ b/scripts/onboarding/add-monitoring-metrics-publisher-role.md
@@ -16,7 +16,7 @@ Of the built-in roles, only Owner and User Access Administrator are granted acce
 ### For single AKS cluster using Azure CLI
 
 ``` sh
-curl -sL https://raw.githubusercontent.com/Microsoft/OMS-docker/ci_feature/docs/aks/mdmonboarding/mdm_onboarding.sh | bash -s <subscriptionId> <clusterResourceGroup> <clusterName>
+curl -sL https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aks/mdmonboarding/mdm_onboarding.sh | bash -s <subscriptionId> <clusterResourceGroup> <clusterName>
 ```
 
 The configuration change can take a few minutes to complete. When it finishes, you see a message similar to the following that includes the result:
@@ -28,7 +28,7 @@ completed the role assignment
 ### For all AKS clusters in the specified subscription using Azure CLI
 
 ``` sh
-curl -sL https://raw.githubusercontent.com/Microsoft/OMS-docker/ci_feature/docs/aks/mdmonboarding/mdm_onboarding_atscale.sh | bash -s <subscriptionId>
+curl -sL https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aks/mdmonboarding/mdm_onboarding_atscale.sh | bash -s <subscriptionId>
 ```
 
 The configuration change can take a few minutes to complete. When it finishes, you see a message similar to the following that includes the result:
@@ -43,7 +43,7 @@ completed role assignments for all AKS clusters in subscription: <subscriptionId
 
 Get the below powershell script files to your local computer.
 
-- Powershell script file [mdm_onboarding.ps1](https://github.com/Microsoft/OMS-docker/blob/ci_feature/docs/aks/mdmonboarding/mdm_onboarding.ps1)
+- Powershell script file [mdm_onboarding.ps1](https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aks/mdmonboarding/mdm_onboarding.ps1)
 - Execute the mdm_onboarding.ps1 by passing the SubscriptionId, ResourceGroupName and clusterName of the AKS cluster
 
 ``` sh
@@ -60,7 +60,7 @@ Successfully added Monitoring Metrics Publisher role assignment to cluster : <ak
 
 Get the below powershell script files to your local computer.
 
-- Powershell script file [mdm_onboarding_atscale.ps1](https://github.com/Microsoft/OMS-docker/blob/ci_feature/docs/aks/mdmonboarding/mdm_onboarding_atscale.ps1)
+- Powershell script file [mdm_onboarding_atscale.ps1](https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aks/mdmonboarding/mdm_onboarding_atscale.ps1)
 - Execute the mdm_onboarding_atscale.ps1 by passing the SubscriptionId where are the AKS clusters in.
 
 ``` sh
diff --git a/scripts/onboarding/aks/mdmonboarding/mdm_onboarding.sh b/scripts/onboarding/aks/mdmonboarding/mdm_onboarding.sh
index 4ee7a3c91..3114b9845 100644
--- a/scripts/onboarding/aks/mdmonboarding/mdm_onboarding.sh
+++ b/scripts/onboarding/aks/mdmonboarding/mdm_onboarding.sh
@@ -15,7 +15,7 @@
 #
 # For example:
 #
-# https://raw.githubusercontent.com/Microsoft/OMS-docker/ci_feature/docs/aks/mdmonboarding/mdm_onboarding.sh | bash -s "00000000-0000-0000-0000-000000000000" "MyAKSClusterRG" "MyAKSCluster"
+# https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aks/mdmonboarding/mdm_onboarding.sh | bash -s "00000000-0000-0000-0000-000000000000" "MyAKSClusterRG" "MyAKSCluster"
 #
 
 echo "subscriptionId"= ${1}
diff --git a/scripts/onboarding/aks/mdmonboarding/mdm_onboarding_atscale.sh b/scripts/onboarding/aks/mdmonboarding/mdm_onboarding_atscale.sh
index dd180a9eb..bcae16b20 100644
--- a/scripts/onboarding/aks/mdmonboarding/mdm_onboarding_atscale.sh
+++ b/scripts/onboarding/aks/mdmonboarding/mdm_onboarding_atscale.sh
@@ -15,7 +15,7 @@
 #
 #  For example:
 #
-#  https://raw.githubusercontent.com/Microsoft/OMS-docker/ci_feature/docs/aks/mdmonboarding/mdm_onboarding_atscale.sh | bash -s "00000000-0000-0000-0000-000000000000"
+#  https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aks/mdmonboarding/mdm_onboarding_atscale.sh | bash -s "00000000-0000-0000-0000-000000000000"
 #
 
 echo "subscriptionId"= ${1}
diff --git a/scripts/onboarding/aksengine/kubernetes/AddMonitoringOnboardingTags.sh b/scripts/onboarding/aksengine/kubernetes/AddMonitoringOnboardingTags.sh
index 7f25d86ef..ad6310a28 100644
--- a/scripts/onboarding/aksengine/kubernetes/AddMonitoringOnboardingTags.sh
+++ b/scripts/onboarding/aksengine/kubernetes/AddMonitoringOnboardingTags.sh
@@ -14,7 +14,7 @@
 #  [Required]  ${5} clusterName                       Name of the cluster configured on the omsAgent (for omsagent.env.clusterName) of specified acs-engine Kubernetes cluster
 #
 #  For example
-# https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature/docs/aksengine/kubernetes/AddMonitoringOnboardingTags.sh | bash -s "name of the cloud" "00000000-0000-0000-0000-000000000000"  "Resource Group Name of AKS-Engine cluster"  "/subscriptions/<SubscriptionId>/resourceGroups/<resourceGroup>/providers/Microsoft.OperationalInsights/workspaces/<workspaceName>" "clusterName of AKS-Engine cluster"
+# https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aksengine/kubernetes/AddMonitoringOnboardingTags.sh | bash -s "name of the cloud" "00000000-0000-0000-0000-000000000000"  "Resource Group Name of AKS-Engine cluster"  "/subscriptions/<SubscriptionId>/resourceGroups/<resourceGroup>/providers/Microsoft.OperationalInsights/workspaces/<workspaceName>" "clusterName of AKS-Engine cluster"
 #
 
 nameoftheCloud=${1}
diff --git a/scripts/onboarding/attach-monitoring-tags.md b/scripts/onboarding/attach-monitoring-tags.md
index f1c9a2e32..f7a802750 100644
--- a/scripts/onboarding/attach-monitoring-tags.md
+++ b/scripts/onboarding/attach-monitoring-tags.md
@@ -10,7 +10,7 @@ If you are not familiar with the concepts of azure resource tags (https://docs.m
 ## Attach tags using Powershell
 
 Get the below powershell script files to your local computer.
-   - Powershell script file [AddMonitoringWorkspaceTags.ps1](https://github.com/Microsoft/OMS-docker/blob/ci_feature/docs/aksengine/kubernetes/AddMonitoringWorkspaceTags.ps1)
+   - Powershell script file [AddMonitoringWorkspaceTags.ps1](https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aksengine/kubernetes/AddMonitoringWorkspaceTags.ps1)
    - Refer for updating the Powershell execution policy (https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.security/set-executionpolicy?view=powershell-6)
    - Log analytics workspace resource Id can retrieved either Azure CLI or Powershell or Azure Portal
       Azure CLI
@@ -50,14 +50,14 @@ The configuration change can take a few minutes to complete. When it finishes, y
 
 ``` sh
 
-curl -sL https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature/docs/aksengine/kubernetes/AddMonitoringOnboardingTags.sh | bash -s <nameoftheCloud> <subscriptionId> <clusterResourceGroup> <logAnalyticsWorkspaceResourceId> <clusterName>
+curl -sL https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aksengine/kubernetes/AddMonitoringOnboardingTags.sh | bash -s <nameoftheCloud> <subscriptionId> <clusterResourceGroup> <logAnalyticsWorkspaceResourceId> <clusterName>
 
 Example for AKS-Engine clusters in Azure Public cloud
 
-curl -sL https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature/docs/aksengine/kubernetes/AddMonitoringOnboardingTags.sh | bash -s "AzureCloud" "00000000-0000-0000-0000-000000000000"  "my-aks-engine-cluster-rg"  "/subscriptions/<SubscriptionId>/resourceGroups/workspaceRg/providers/Microsoft.OperationalInsights/workspaces/workspaceName" "my-aks-engine-cluster"
+curl -sL https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aksengine/kubernetes/AddMonitoringOnboardingTags.sh | bash -s "AzureCloud" "00000000-0000-0000-0000-000000000000"  "my-aks-engine-cluster-rg"  "/subscriptions/<SubscriptionId>/resourceGroups/workspaceRg/providers/Microsoft.OperationalInsights/workspaces/workspaceName" "my-aks-engine-cluster"
 
 Example for AKS-Engine clusters in Azure China cloud
 
-curl -sL https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature/docs/aksengine/kubernetes/AddMonitoringOnboardingTags.sh | bash -s "AzureChinaCloud" "00000000-0000-0000-0000-000000000000"  "my-aks-engine-cluster-rg"  "/subscriptions/<SubscriptionId>/resourceGroups/workspaceRg/providers/Microsoft.OperationalInsights/workspaces/workspaceName" "my-aks-engine-cluster"
+curl -sL https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/aksengine/kubernetes/AddMonitoringOnboardingTags.sh | bash -s "AzureChinaCloud" "00000000-0000-0000-0000-000000000000"  "my-aks-engine-cluster-rg"  "/subscriptions/<SubscriptionId>/resourceGroups/workspaceRg/providers/Microsoft.OperationalInsights/workspaces/workspaceName" "my-aks-engine-cluster"
 
 ```
diff --git a/scripts/onboarding/hybrid/onboarding_azuremonitor_for_containers.sh b/scripts/onboarding/hybrid/onboarding_azuremonitor_for_containers.sh
index b66dca67d..e2afa579d 100644
--- a/scripts/onboarding/hybrid/onboarding_azuremonitor_for_containers.sh
+++ b/scripts/onboarding/hybrid/onboarding_azuremonitor_for_containers.sh
@@ -151,7 +151,7 @@ echo "workspaceResourceId:"$workspaceResourceId
 echo "workspaceGuid:"$workspaceGuid
 
 echo "adding containerinsights solution to workspace"
-solution=$(az group deployment create -g $defaultWorkspaceResourceGroup --template-uri https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature_prod/docs/templates/azuremonitor-containerSolution.json --parameters workspaceResourceId=$workspaceResourceId --parameters workspaceRegion=$workspaceRegion)
+solution=$(az group deployment create -g $defaultWorkspaceResourceGroup --template-uri https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/templates/azuremonitor-containerSolution.json --parameters workspaceResourceId=$workspaceResourceId --parameters workspaceRegion=$workspaceRegion)
 
 echo "getting workspace primaryshared key"
 workspaceKey=$(az rest --method post --uri $workspaceResourceId/sharedKeys?api-version=2015-11-01-preview --query primarySharedKey)
diff --git a/scripts/onboarding/managed/disable-monitoring.ps1 b/scripts/onboarding/managed/disable-monitoring.ps1
index 41ba2adb0..ea66cb3a3 100644
--- a/scripts/onboarding/managed/disable-monitoring.ps1
+++ b/scripts/onboarding/managed/disable-monitoring.ps1
@@ -7,6 +7,12 @@
 
     .PARAMETER clusterResourceId
         Id of the Azure Managed Cluster such as Azure ARC K8s, ARO v4 etc.
+    .PARAMETER servicePrincipalClientId
+        client Id of the service principal which will be used for the azure login
+    .PARAMETER servicePrincipalClientSecret
+        client secret of the service principal which will be used for the azure login
+    .PARAMETER tenantId
+        tenantId of the service principal which will be used for the azure login
     .PARAMETER kubeContext (optional)
         kube-context of the k8 cluster to install Azure Monitor for containers HELM chart
 
@@ -22,6 +28,11 @@
 param(
     [Parameter(mandatory = $true)]
     [string]$clusterResourceId,
+    [string]$servicePrincipalClientId,
+    [Parameter(mandatory = $false)]
+    [string]$servicePrincipalClientSecret,
+    [Parameter(mandatory = $false)]
+    [string]$tenantId,
     [Parameter(mandatory = $false)]
     [string]$kubeContext
 )
@@ -33,6 +44,7 @@ $helmChartName = "azuremonitor-containers"
 $isArcK8sCluster = $false
 $isAksCluster =  $false
 $isAroV4Cluster = $false
+$isUsingServicePrincipal = $false
 
 # checks the required Powershell modules exist and if not exists, request the user permission to install
 $azAccountModule = Get-Module -ListAvailable -Name Az.Accounts
@@ -199,11 +211,24 @@ if ($clusterResourceId.ToLower().Contains("microsoft.kubernetes/connectedcluster
    $isAroV4Cluster = $true
 }
 
+if(([string]::IsNullOrEmpty($servicePrincipalClientId) -eq $false) -and
+   ([string]::IsNullOrEmpty($servicePrincipalClientSecret) -eq $false) -and
+   ([string]::IsNullOrEmpty($tenantId) -eq $false)) {
+    Write-Host("Using service principal creds for the azure login since provided.")
+    $isUsingServicePrincipal = $true
+ }
+
 $resourceParts = $clusterResourceId.Split("/")
 $clusterSubscriptionId = $resourceParts[2]
 
 Write-Host("Cluster SubscriptionId : '" + $clusterSubscriptionId + "' ") -ForegroundColor Green
 
+if ($isUsingServicePrincipal) {
+    $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
+    $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
+    Connect-AzAccount -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+}
+
 try {
     Write-Host("")
     Write-Host("Trying to get the current Az login context...")
@@ -220,8 +245,15 @@ catch {
 
 if ($null -eq $account.Account) {
     try {
-        Write-Host("Please login...")
-        Connect-AzAccount -subscriptionid $clusterSubscriptionId
+
+        if ($isUsingServicePrincipal) {
+            $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
+            $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
+            Connect-AzAccount -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+        } else {
+           Write-Host("Please login...")
+          Connect-AzAccount -subscriptionid $clusterSubscriptionId
+        }
     }
     catch {
         Write-Host("")
diff --git a/scripts/onboarding/managed/disable-monitoring.sh b/scripts/onboarding/managed/disable-monitoring.sh
index f55b4e617..f20bd7d33 100644
--- a/scripts/onboarding/managed/disable-monitoring.sh
+++ b/scripts/onboarding/managed/disable-monitoring.sh
@@ -14,7 +14,10 @@
 # 1. disable monitoring using current kube-context
 # bash disable_monitoring.sh --resource-id/-r <clusterResourceId>
 
-# 2. disable monitoring using specific kube-context
+# 2. disable monitoring using specific kube-context using service principal creds for the azure login
+# bash disable_monitoring.sh --resource-id <clusterResourceId> --client-id <sp client id> --client-secret <sp client secret> --tenant-id <tenant id of the service principal>
+
+# 3. disable monitoring using specific kube-context
 # bash disable_monitoring.sh --resource-id/-r <clusterResourceId> --kube-context/-k <kube-context>
 
 
@@ -48,12 +51,18 @@ isAroV4Cluster=false
 clusterResourceId=""
 kubeconfigContext=""
 
+# sp details for the login if provided
+servicePrincipalClientId=""
+servicePrincipalClientSecret=""
+servicePrincipalTenantId=""
+isUsingServicePrincipal=false
+
 usage()
 {
     local basename=`basename $0`
     echo
     echo "Disable Azure Monitor for containers:"
-    echo "$basename --resource-id/-r <cluster resource id> [--kube-context/-k <name of the kube context >]"
+    echo "$basename --resource-id/-r <cluster resource id> [--client-id <clientId of service principal>] [--client-secret <client secret of service principal>] [--tenant-id <tenant id of the service principal>] [--kube-context/-k <name of the kube context >]"
 }
 
 delete_helm_release()
@@ -105,8 +114,13 @@ remove_monitoring_tags()
 {
   echo "deleting monitoring tags ..."
 
-  echo "login to the azure interactively"
-  az login --use-device-code
+  if [ "$isUsingServicePrincipal" = true ] ; then
+     echo "login to the azure using provided service principal creds"
+     az login --service-principal --username $servicePrincipalClientId --password $servicePrincipalClientSecret --tenant $servicePrincipalTenantId
+  else
+     echo "login to the azure interactively"
+     az login --use-device-code
+  fi
 
   echo "set the cluster subscription id: ${clusterSubscriptionId}"
   az account set -s ${clusterSubscriptionId}
@@ -159,6 +173,9 @@ for arg in "$@"; do
   case "$arg" in
     "--resource-id") set -- "$@" "-r" ;;
     "--kube-context") set -- "$@" "-k" ;;
+    "--client-id") set -- "$@" "-c" ;;
+    "--client-secret") set -- "$@" "-s" ;;
+    "--tenant-id") set -- "$@" "-t" ;;
     "--help")   set -- "$@" "-h" ;;
     "--"*)   usage ;;
     *)        set -- "$@" "$arg"
@@ -167,7 +184,7 @@ done
 
  local OPTIND opt
 
- while getopts 'hk:r:' opt; do
+ while getopts 'hk:c:s:t:r:' opt; do
     case "$opt" in
       h)
       usage
@@ -183,6 +200,21 @@ done
         echo "clusterResourceId is $OPTARG"
         ;;
 
+      c)
+        servicePrincipalClientId="$OPTARG"
+        echo "servicePrincipalClientId is $OPTARG"
+        ;;
+
+      s)
+        servicePrincipalClientSecret="$OPTARG"
+        echo "clientSecret is *****"
+        ;;
+
+      t)
+        servicePrincipalTenantId="$OPTARG"
+        echo "service principal tenantId is $OPTARG"
+        ;;
+
       ?)
         usage
         exit 1
@@ -241,6 +273,11 @@ done
    exit 1
  fi
 
+ if [ ! -z "$servicePrincipalClientId" -a  ! -z "$servicePrincipalClientSecret"  -a  ! -z "$servicePrincipalTenantId" ]; then
+   echo "using service principal creds (clientId, secret and tenantId) for azure login since provided"
+   isUsingServicePrincipal=true
+ fi
+
 }
 
 
diff --git a/scripts/onboarding/managed/enable-monitoring.ps1 b/scripts/onboarding/managed/enable-monitoring.ps1
index 346cdc81a..b734ba347 100644
--- a/scripts/onboarding/managed/enable-monitoring.ps1
+++ b/scripts/onboarding/managed/enable-monitoring.ps1
@@ -9,6 +9,12 @@
 
     .PARAMETER clusterResourceId
         Id of the Azure Managed Cluster such as Azure ARC K8s, ARO v4 etc.
+    .PARAMETER servicePrincipalClientId
+        Client Id of the service principal which will be used for the azure login
+    .PARAMETER servicePrincipalClientSecret
+        Client secret of the service principal which will be used for the azure login
+    .PARAMETER tenantId
+        Azure TenantId of the service principal which will be used for the azure login
     .PARAMETER kubeContext (optional)
         kube-context of the k8 cluster to install Azure Monitor for containers HELM chart
     .PARAMETER workspaceResourceId (optional)
@@ -34,6 +40,12 @@ param(
     [Parameter(mandatory = $true)]
     [string]$clusterResourceId,
     [Parameter(mandatory = $false)]
+    [string]$servicePrincipalClientId,
+    [Parameter(mandatory = $false)]
+    [string]$servicePrincipalClientSecret,
+    [Parameter(mandatory = $false)]
+    [string]$tenantId,
+    [Parameter(mandatory = $false)]
     [string]$kubeContext,
     [Parameter(mandatory = $false)]
     [string]$workspaceResourceId,
@@ -53,6 +65,7 @@ $helmChartRepoUrl = "https://kubernetes-charts-incubator.storage.googleapis.com/
 # flags to indicate the cluster types
 $isArcK8sCluster = $false
 $isAksCluster =  $false
+$isUsingServicePrincipal = $false
 
 if([string]::IsNullOrEmpty($helmRepoName) -eq $false){
     $helmChartRepoName = $helmRepoName
@@ -220,6 +233,13 @@ if (($clusterResourceId.ToLower().Contains("microsoft.kubernetes/connectedcluste
     exit
 }
 
+if(([string]::IsNullOrEmpty($servicePrincipalClientId) -eq $false) -and
+   ([string]::IsNullOrEmpty($servicePrincipalClientSecret) -eq $false) -and
+   ([string]::IsNullOrEmpty($tenantId) -eq $false)) {
+   Write-Host("Using service principal creds for the azure login since these provided.")
+   $isUsingServicePrincipal = $true
+}
+
 if ($clusterResourceId.ToLower().Contains("microsoft.kubernetes/connectedclusters") -eq $true) {
    $isArcK8sCluster = $true
 } elseif ($clusterResourceId.ToLower().Contains("microsoft.containerservice/managedclusters") -eq $true) {
@@ -231,6 +251,12 @@ $clusterSubscriptionId = $resourceParts[2]
 
 Write-Host("Cluster SubscriptionId : '" + $clusterSubscriptionId + "' ") -ForegroundColor Green
 
+if ($isUsingServicePrincipal) {
+    $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
+    $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
+    Connect-AzAccount -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+}
+
 try {
     Write-Host("")
     Write-Host("Trying to get the current Az login context...")
@@ -247,8 +273,14 @@ catch {
 
 if ($null -eq $account.Account) {
     try {
-        Write-Host("Please login...")
-        Connect-AzAccount -subscriptionid $clusterSubscriptionId
+        if ($isUsingServicePrincipal) {
+            $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
+            $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
+            Connect-AzAccount -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+        } else {
+         Write-Host("Please login...")
+         Connect-AzAccount -subscriptionid $clusterSubscriptionId
+       }
     }
     catch {
         Write-Host("")
@@ -498,7 +530,7 @@ try {
     helm repo add $helmChartRepoName $helmChartRepoUrl
     Write-Host("updating helm repo to get latest version of charts")
     helm repo update
-    $helmParameters = "omsagent.secret.wsid=$workspaceGUID,omsagent.secret.key=$workspacePrimarySharedKey,omsagent.env.clusterId=$clusterResourceId"
+    $helmParameters = "omsagent.secret.wsid=$workspaceGUID,omsagent.secret.key=$workspacePrimarySharedKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion"
     if([string]::IsNullOrEmpty($proxyEndpoint) -eq $false) {
         Write-Host("using proxy endpoint since its provided")
         $helmParameters = $helmParameters + ",omsagent.proxy=$proxyEndpoint"
diff --git a/scripts/onboarding/managed/enable-monitoring.sh b/scripts/onboarding/managed/enable-monitoring.sh
index 5a8e7e040..17c075725 100644
--- a/scripts/onboarding/managed/enable-monitoring.sh
+++ b/scripts/onboarding/managed/enable-monitoring.sh
@@ -20,17 +20,19 @@
 # 1. Using Default Azure Log Analytics and no-proxy with current kube config context
 # bash enable-monitoring.sh --resource-id <clusterResourceId>
 
-# 2. Using Default Azure Log Analytics and no-proxy
+# 2. Using Default Azure Log Analytics and no-proxy with current kube config context, and using service principal creds for the azure login
+# bash enable-monitoring.sh --resource-id <clusterResourceId> --client-id <sp client id> --client-secret <sp client secret> --tenant-id <tenant id of the service principal>
+
+# 3. Using Default Azure Log Analytics and no-proxy
 # bash enable-monitoring.sh  --resource-id <clusterResourceId> --kube-context <kube-context>
 
-# 3. Using Default Azure Log Analytics and with proxy endpoint configuration
+# 4. Using Default Azure Log Analytics and with proxy endpoint configuration
 # bash enable-monitoring.sh  --resource-id <clusterResourceId> --kube-context <kube-context> --proxy <proxy-endpoint>
 
-
-# 4. Using Existing Azure Log Analytics and no-proxy
+# 5. Using Existing Azure Log Analytics and no-proxy
 # bash enable-monitoring.sh  --resource-id <clusterResourceId> --kube-context <kube-context> --workspace-id <workspace-resource-id>
 
-# 5. Using Existing Azure Log Analytics and proxy
+# 6. Using Existing Azure Log Analytics and proxy
 # bash enable-monitoring.sh  --resource-id <clusterResourceId> --kube-context <kube-context> --workspace-id <workspace-resource-id> --proxy <proxy-endpoint>
 
 set -e
@@ -95,12 +97,18 @@ workspaceResourceGroup="DefaultResourceGroup-"$workspaceRegionCode
 workspaceGuid=""
 workspaceKey=""
 
+# sp details for the login if provided
+servicePrincipalClientId=""
+servicePrincipalClientSecret=""
+servicePrincipalTenantId=""
+isUsingServicePrincipal=false
+
 usage()
 {
     local basename=`basename $0`
     echo
     echo "Enable Azure Monitor for containers:"
-    echo "$basename --resource-id <cluster resource id> [--kube-context <name of the kube context >] [--workspace-id <resource id of existing workspace>] [--proxy <proxy endpoint>]"
+    echo "$basename --resource-id <cluster resource id> [--client-id <clientId of service principal>] [--client-secret <client secret of service principal>] [--tenant-id <tenant id of the service principal>] [--kube-context <name of the kube context >] [--workspace-id <resource id of existing workspace>] [--proxy <proxy endpoint>]"
 }
 
 parse_args()
@@ -120,8 +128,12 @@ for arg in "$@"; do
     "--kube-context") set -- "$@" "-k" ;;
     "--workspace-id") set -- "$@" "-w" ;;
     "--proxy") set -- "$@" "-p" ;;
+    "--client-id") set -- "$@" "-c" ;;
+    "--client-secret") set -- "$@" "-s" ;;
+    "--tenant-id") set -- "$@" "-t" ;;
     "--helm-repo-name") set -- "$@" "-n" ;;
     "--helm-repo-url") set -- "$@" "-u" ;;
+    "--container-log-volume") set -- "$@" "-v" ;;
     "--"*)   usage ;;
     *)        set -- "$@" "$arg"
   esac
@@ -129,7 +141,7 @@ done
 
 local OPTIND opt
 
-while getopts 'hk:r:w:p:n:u:' opt; do
+while getopts 'hk:r:w:p:c:s:t:n:u:v:' opt; do
     case "$opt" in
       h)
       usage
@@ -155,6 +167,21 @@ while getopts 'hk:r:w:p:n:u:' opt; do
         echo "proxyEndpoint is $OPTARG"
         ;;
 
+      c)
+        servicePrincipalClientId="$OPTARG"
+        echo "servicePrincipalClientId is $OPTARG"
+        ;;
+
+      s)
+        servicePrincipalClientSecret="$OPTARG"
+        echo "clientSecret is *****"
+        ;;
+
+      t)
+        servicePrincipalTenantId="$OPTARG"
+        echo "service principal tenantId is $OPTARG"
+        ;;
+
       n)
         helmRepoName="$OPTARG"
         echo "helm repo name is $OPTARG"
@@ -277,6 +304,11 @@ if [ ! -z "$proxyEndpoint" ]; then
     fi
 fi
 
+if [ ! -z "$servicePrincipalClientId" -a  ! -z "$servicePrincipalClientSecret"  -a  ! -z "$servicePrincipalTenantId" ]; then
+   echo "using service principal creds (clientId, secret and tenantId) for azure login since provided"
+   isUsingServicePrincipal=true
+fi
+
 }
 
 configure_to_public_cloud()
@@ -309,7 +341,9 @@ create_default_log_analytics_workspace()
 
   # extract subscription from cluster resource id
   local subscriptionId="$(echo $clusterResourceId | cut -d'/' -f3)"
-  local clusterRegion=$(az resource show --ids ${clusterResourceId} --query location)
+  local clusterRegion=$(az resource show --ids ${clusterResourceId} --query location -o tsv)
+  # convert cluster region to lower case
+  clusterRegion=$(echo $clusterRegion | tr "[:upper:]" "[:lower:]")
   echo "cluster region:" $clusterRegion
 
   # mapping fors for default Azure Log Analytics workspace
@@ -464,6 +498,10 @@ install_helm_chart()
   echo "installing Azure Monitor for containers HELM chart on to the cluster with kubecontext:${kubeconfigContext} ..."
  fi
 
+ echo "getting the region of the cluster"
+ clusterRegion=$(az resource show --ids ${clusterResourceId} --query location)
+ echo "cluster region is : ${clusterRegion}"
+
  echo "adding helm repo:" $helmRepoName
  helm repo add $helmRepoName $helmRepoUrl
 
@@ -474,18 +512,18 @@ install_helm_chart()
    echo "using proxy endpoint since proxy configuration passed in"
    if [ -z "$kubeconfigContext" ]; then
      echo "using current kube-context since --kube-context/-k parameter not passed in"
-     helm upgrade --install azmon-containers-release-1 --set omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId $helmRepoName/$helmChartName
+     helm upgrade --install azmon-containers-release-1 --set omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmRepoName/$helmChartName
    else
      echo "using --kube-context:${kubeconfigContext} since passed in"
-     helm upgrade --install azmon-containers-release-1 --set omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId $helmRepoName/$helmChartName --kube-context ${kubeconfigContext}
+     helm upgrade --install azmon-containers-release-1 --set omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmRepoName/$helmChartName --kube-context ${kubeconfigContext}
    fi
  else
    if [ -z "$kubeconfigContext" ]; then
      echo "using current kube-context since --kube-context/-k parameter not passed in"
-     helm upgrade --install azmon-containers-release-1 --set omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId $helmRepoName/$helmChartName
+     helm upgrade --install azmon-containers-release-1 --set omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmRepoName/$helmChartName
    else
      echo "using --kube-context:${kubeconfigContext} since passed in"
-     helm upgrade --install azmon-containers-release-1 --set omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId $helmRepoName/$helmChartName --kube-context ${kubeconfigContext}
+     helm upgrade --install azmon-containers-release-1 --set omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmRepoName/$helmChartName --kube-context ${kubeconfigContext}
    fi
  fi
 
@@ -495,8 +533,13 @@ install_helm_chart()
 
 login_to_azure()
 {
-  echo "login to the azure interactively"
-  az login --use-device-code
+  if [ "$isUsingServicePrincipal" = true ] ; then
+     echo "login to the azure using provided service principal creds"
+     az login --service-principal --username $servicePrincipalClientId --password $servicePrincipalClientSecret --tenant $servicePrincipalTenantId
+  else
+    echo "login to the azure interactively"
+    az login --use-device-code
+  fi
 }
 
 set_azure_subscription()
diff --git a/scripts/onboarding/solution-onboarding.md b/scripts/onboarding/solution-onboarding.md
index 045738762..13e76530d 100644
--- a/scripts/onboarding/solution-onboarding.md
+++ b/scripts/onboarding/solution-onboarding.md
@@ -6,8 +6,8 @@ You can either use the Azure Powershell or Azure cli to deploy the solution.
 If you are not familiar with the concepts of deploying resources using a template with PowerShell, see [Deploy resources with Resource Manager templates and Azure PowerShell](https://review.docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-template-deploy)
 
 1. Get the below template files to your local computer.
-   - Template file [azuremonitor-containerSolution.json](https://github.com/Microsoft/OMS-docker/blob/ci_feature_prod/docs/templates/azuremonitor-containerSolution.json)
-   - TemplateParams file [azuremonitor-containerSolutionParams.json](https://github.com/Microsoft/OMS-docker/blob/ci_feature_prod/docs/templates/azuremonitor-containerSolutionParams.json)
+   - Template file [azuremonitor-containerSolution.json](https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/templates/azuremonitor-containerSolution.json)
+   - TemplateParams file [azuremonitor-containerSolutionParams.json](https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/templates/azuremonitor-containerSolutionParams.json)
 2. Edit the TemplateParams file in your local computer.
    * workspaceResourceId parameter :
        - Replace `<SubscriptionId>` with Azure subscriptionID for your Workspace
diff --git a/scripts/preview/health/HealthAgentOnboarding.ps1 b/scripts/preview/health/HealthAgentOnboarding.ps1
index 881dd2549..9ce8eca74 100644
--- a/scripts/preview/health/HealthAgentOnboarding.ps1
+++ b/scripts/preview/health/HealthAgentOnboarding.ps1
@@ -339,7 +339,7 @@ if ($false -eq $isSolutionOnboarded) {
     try {
         New-AzResourceGroupDeployment -Name $DeploymentName `
             -ResourceGroupName $workspaceResourceGroupName `
-            -TemplateUri  https://raw.githubusercontent.com/Microsoft/OMS-docker/ci_feature/docs/templates/azuremonitor-containerSolution.json `
+            -TemplateUri  https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/templates/azuremonitor-containerSolution.json `
             -TemplateParameterObject $Parameters -ErrorAction Stop`
 
 
diff --git a/scripts/troubleshoot/README.md b/scripts/troubleshoot/README.md
index d4e2e9cf4..5ffa07639 100644
--- a/scripts/troubleshoot/README.md
+++ b/scripts/troubleshoot/README.md
@@ -7,7 +7,7 @@ The table below summarizes known issues you may face while using Azure Monitor f
 | ---- | --- |
 | Error Message `No data for selected filters`  | It may take some time to establish monitoring data flow for newly created clusters. Please allow at least 10-15 minutes for data to appear for your cluster. |
 | Error Message `Error retrieving data` | While Azure Kubenetes Service cluster is setting up for health and performance monitoring, a connection is established between the cluster and Azure Log Analytics workspace. Log Analytics workspace is used to store all monitoring data for your cluster. This error may occurr when your Log Analytics workspace has been deleted or lost. Please check whether your Log Analytics workspace is available. To find your Log Analytics workspace go [here.](https://docs.microsoft.com/en-us/azure/log-analytics/log-analytics-manage-access) and your workspace is available. If the workspace is missing, you will need to re-onboard Container Health to your cluster. To re-onboard, you will need to [opt out](https://docs.microsoft.com/en-us/azure/azure-monitor/insights/container-insights-optout) of monitoring for the cluster and [onboard](https://docs.microsoft.com/en-us/azure/azure-monitor/insights/container-insights-enable-existing-clusters) again to Container Health. |
-| `Error retrieving data` after adding Container Health through az aks cli | When onboarding using az aks cli, very seldom, Container Health may not be properly onboarded. Please check whether the Container Insights Solution is onboarded. To do this, go to your [Log Analytics workspace](https://docs.microsoft.com/en-us/azure/log-analytics/log-analytics-manage-access) and see if Container Insights Solution is available by going to the "Solutions" tab under General. To resolve this issue, you will need to redeploy the Container Insights Solution. Please follow the instructions on [how to deploy Azure Monitor - container health solution to your Log Analytics workspace. ](https://github.com/Microsoft/OMS-docker/blob/ci_feature_prod/docs/solution-onboarding.md) |
+| `Error retrieving data` after adding Container Health through az aks cli | When onboarding using az aks cli, very seldom, Container Health may not be properly onboarded. Please check whether the Container Insights Solution is onboarded. To do this, go to your [Log Analytics workspace](https://docs.microsoft.com/en-us/azure/log-analytics/log-analytics-manage-access) and see if Container Insights Solution is available by going to the "Solutions" tab under General. To resolve this issue, you will need to redeploy the Container Insights Solution. Please follow the instructions on [how to deploy Azure Monitor - container health solution to your Log Analytics workspace. ](https://github.com/microsoft/Docker-Provider/blob/ci_prod/scripts/onboarding/solution-onboarding.md) |
 | Failed to `Enable fast alerting experience on basic metrics for this Azure Kubernetes Services cluster`  | The action is trying to grant the Monitoring Metrics Publisher role assignment on the cluster resource. The user initiating the process must have access to the **Microsoft.Authorization/roleAssignments/write** permission on the AKS cluster resource scope. Only members of the **Owner** and **User Access Administrator** built-in roles are granted access to this permission. If your security policies require assigning granular level permissions, we recommend you view [custom roles](https://docs.microsoft.com/en-us/azure/role-based-access-control/custom-roles) and assign it to the users who require it. |
 
 # Azure Red Hat OpenShift Service (ARO)
@@ -36,7 +36,7 @@ Prequisites:
 
 # AKS or ARO
 
-You can use the troubleshooting script provided [here](https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature_prod/Troubleshoot/TroubleshootError.ps1) to diagnose the problem.
+You can use the troubleshooting script provided [here](https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/troubleshoot/TroubleshootError.ps1) to diagnose the problem.
 
 Steps:
 - Open powershell using the [cloudshell](https://docs.microsoft.com/en-us/azure/cloud-shell/overview) in the azure portal.
@@ -45,8 +45,8 @@ Steps:
  For Mac OS, refer [install-powershell-core-on-mac](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell-core-on-macos?view=powershell-6) how to install powershell
 - Make sure that you're using powershell (selected by default)
 - Run the following command to change home directory - `cd ~`
-- Run the following command to download the script - `curl -LO https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature/Troubleshoot/TroubleshootError.ps1`
- > Note: In some versions of Powershell above CURL command may not work in such cases, you can try  `curl https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature/Troubleshoot/TroubleshootError.ps1 -O TroubleshootError.ps1`
+- Run the following command to download the script - `curl -LO https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/troubleshoot/TroubleshootError.ps1`
+ > Note: In some versions of Powershell above CURL command may not work in such cases, you can try  `curl https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/troubleshoot/TroubleshootError.ps1 -O TroubleshootError.ps1`
 - Run the following command to execute the script - `./TroubleshootError.ps1 -ClusterResourceId <resourceIdoftheCluster>`
     > Note: For AKS, resourceIdoftheCluster should be in this format `/subscriptions/<subId>/resourceGroups/<rgName>/providers/Microsoft.ContainerService/managedClusters/<clusterName>`.For ARO, should be in this format `/subscriptions/<subId>/resourceGroups/<rgName>/providers/Microsoft.ContainerService/openShiftManagedClusters/<clusterName>`.
 - This script will generate a TroubleshootDump.txt which collects detailed information about container health onboarding.
@@ -54,10 +54,10 @@ Steps:
 
 # Aks-Engine Kubernetes
 
-You can use the troubleshooting script provided [here](https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature_prod/Troubleshoot/TroubleshootError_AcsEngine.ps1) to diagnose the problem.
+You can use the troubleshooting script provided [here](https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/troubleshoot/TroubleshootError_AcsEngine.ps1) to diagnose the problem.
 
 Steps:
-- Download [TroubleshootError_AcsEngine.ps1](https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature_prod/Troubleshoot/TroubleshootError_AcsEngine.ps1), [ContainerInsightsSolution.json](https://raw.githubusercontent.com/microsoft/OMS-docker/ci_feature_prod/Troubleshoot/ContainerInsightsSolution.json)
+- Download [TroubleshootError_AcsEngine.ps1](https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/troubleshoot/TroubleshootError_AcsEngine.ps1), [ContainerInsightsSolution.json](https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/troubleshoot/ContainerInsightsSolution.json)
 - Collect Subscription ID, Resource group name of the Aks-Engine Kubernetes cluster
 - Use the following command to run the script : `.\TroubleshootError_AcsEngine.ps1 -SubscriptionId <subId> -ResourceGroupName <rgName>`.
 This script will generate a TroubleshootDump.txt which collects detailed information about container health onboarding.
diff --git a/scripts/troubleshoot/TroubleshootError.ps1 b/scripts/troubleshoot/TroubleshootError.ps1
index 7f857caa3..754a43e74 100644
--- a/scripts/troubleshoot/TroubleshootError.ps1
+++ b/scripts/troubleshoot/TroubleshootError.ps1
@@ -671,7 +671,7 @@ else {
             try {
                 New-AzResourceGroupDeployment -Name $DeploymentName `
                     -ResourceGroupName $workspaceResourceGroupName `
-                    -TemplateUri  https://raw.githubusercontent.com/Microsoft/OMS-docker/ci_feature/docs/templates/azuremonitor-containerSolution.json `
+                    -TemplateUri  https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/templates/azuremonitor-containerSolution.json `
                     -TemplateParameterObject $Parameters -ErrorAction Stop`
 
                 Write-Host("")
diff --git a/scripts/troubleshoot/TroubleshootError_nonAzureK8s.ps1 b/scripts/troubleshoot/TroubleshootError_nonAzureK8s.ps1
index c7509a940..14b080b23 100644
--- a/scripts/troubleshoot/TroubleshootError_nonAzureK8s.ps1
+++ b/scripts/troubleshoot/TroubleshootError_nonAzureK8s.ps1
@@ -345,7 +345,7 @@ else {
         try {
             New-AzResourceGroupDeployment -Name $DeploymentName `
                 -ResourceGroupName $defaultWorkspaceResourceGroup `
-                -TemplateUri  https://raw.githubusercontent.com/Microsoft/OMS-docker/ci_feature/docs/templates/azuremonitor-containerSolution.json `
+                -TemplateUri  https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/scripts/onboarding/templates/azuremonitor-containerSolution.json `
                 -TemplateParameterObject $Parameters -ErrorAction Stop`
 
             Write-Host("")

From 2d56087e528a145aeb06b5beb6a60092dfa41e15 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 14 Sep 2020 12:34:33 -0700
Subject: [PATCH 17/60] add missing serviceprincipal in ps scripts (#435)

---
 scripts/onboarding/managed/disable-monitoring.ps1 | 4 ++--
 scripts/onboarding/managed/enable-monitoring.ps1  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/onboarding/managed/disable-monitoring.ps1 b/scripts/onboarding/managed/disable-monitoring.ps1
index ea66cb3a3..8945f90b6 100644
--- a/scripts/onboarding/managed/disable-monitoring.ps1
+++ b/scripts/onboarding/managed/disable-monitoring.ps1
@@ -226,7 +226,7 @@ Write-Host("Cluster SubscriptionId : '" + $clusterSubscriptionId + "' ") -Foregr
 if ($isUsingServicePrincipal) {
     $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
     $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
-    Connect-AzAccount -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+    Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
 }
 
 try {
@@ -249,7 +249,7 @@ if ($null -eq $account.Account) {
         if ($isUsingServicePrincipal) {
             $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
             $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
-            Connect-AzAccount -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+            Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
         } else {
            Write-Host("Please login...")
           Connect-AzAccount -subscriptionid $clusterSubscriptionId
diff --git a/scripts/onboarding/managed/enable-monitoring.ps1 b/scripts/onboarding/managed/enable-monitoring.ps1
index b734ba347..338de6cbc 100644
--- a/scripts/onboarding/managed/enable-monitoring.ps1
+++ b/scripts/onboarding/managed/enable-monitoring.ps1
@@ -254,7 +254,7 @@ Write-Host("Cluster SubscriptionId : '" + $clusterSubscriptionId + "' ") -Foregr
 if ($isUsingServicePrincipal) {
     $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
     $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
-    Connect-AzAccount -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+    Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
 }
 
 try {
@@ -276,7 +276,7 @@ if ($null -eq $account.Account) {
         if ($isUsingServicePrincipal) {
             $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
             $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
-            Connect-AzAccount -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+            Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
         } else {
          Write-Host("Please login...")
          Connect-AzAccount -subscriptionid $clusterSubscriptionId

From a28aaf025f91957f193121e66fbfb1c1f9d6abe4 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 15 Sep 2020 11:46:14 -0700
Subject: [PATCH 18/60] fix telemetry bug (#436)

---
 source/plugins/ruby/out_mdm.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb
index b28c17034..c4cc46dd7 100644
--- a/source/plugins/ruby/out_mdm.rb
+++ b/source/plugins/ruby/out_mdm.rb
@@ -272,7 +272,7 @@ def send_to_mdm(post_body)
           @last_telemetry_sent_time = Time.now
         end
       rescue Net::HTTPServerException => e
-        if !response.nil && !response.body.nil? #body will have actual error
+        if !response.nil? && !response.body.nil? #body will have actual error
           @log.info "Failed to Post Metrics to MDM : #{e} Response.body: #{response.body}"
         else
           @log.info "Failed to Post Metrics to MDM : #{e} Response: #{response}"

From 0062b32da17eece46f6e754c0f8a35ac57c75c92 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 16 Sep 2020 10:59:03 -0700
Subject: [PATCH 19/60] Gangams/readmeupdates non aks 09162020 (#437)

* changes for ciprod09162020 non-aks release

* fix script to handle cross sub scenario

* fix minor comment

* fix date in version file

* fix pr comments
---
 ReleaseNotes.md                                 | 12 +++++++++++-
 build/version                                   |  4 ++--
 charts/azuremonitor-containers/values.yaml      |  6 +++---
 kubernetes/linux/Dockerfile                     |  2 +-
 kubernetes/omsagent.yaml                        | 12 ++++++------
 kubernetes/windows/Dockerfile                   |  2 +-
 scripts/onboarding/managed/enable-monitoring.sh |  2 +-
 7 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index 0f1d932a8..547d00573 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -11,6 +11,17 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
 
+### 09/16/2020 -
+> Note: This agent release targetted ONLY for non-AKS clusters via Azure Monitor for containers HELM chart update
+##### Version microsoft/oms:ciprod09162020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod09162020 (linux)
+##### Version microsoft/oms:win-ciprod09162020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod09162020 (windows)
+##### Code change log
+- Collection of Azure Network Policy Manager Basic and Advanced metrics
+- Add support in Windows Agent for Container log collection of CRI runtimes such as ContainerD
+- Alertable metrics support Arc K8s cluster to parity with AKS
+- Support for multiple container log mount paths when docker is updated through knode
+- Bug fix related to MDM telemetry
+
 ### 08/07/2020 -
 ##### Version microsoft/oms:ciprod08072020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08072020 (linux)
 ##### Version microsoft/oms:win-ciprod08072020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod08072020 (windows)
@@ -26,7 +37,6 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
 - Add region check before sending alertable metrics to MDM
 - Telemetry fix for agent telemetry for sov. clouds
 
-
 ### 07/15/2020 -
 ##### Version microsoft/oms:ciprod07152020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod07152020 (linux)
 ##### Version microsoft/oms:win-ciprod05262020-2 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod05262020-2 (windows)
diff --git a/build/version b/build/version
index f26973116..b53b0dcfb 100644
--- a/build/version
+++ b/build/version
@@ -5,8 +5,8 @@
 CONTAINER_BUILDVERSION_MAJOR=10
 CONTAINER_BUILDVERSION_MINOR=0
 CONTAINER_BUILDVERSION_PATCH=0
-CONTAINER_BUILDVERSION_BUILDNR=4
-CONTAINER_BUILDVERSION_DATE=20200805
+CONTAINER_BUILDVERSION_BUILDNR=5
+CONTAINER_BUILDVERSION_DATE=20200916
 CONTAINER_BUILDVERSION_STATUS=Developer_Build
 
 #-------------------------------- End of File -----------------------------------
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 4d0d7f8f2..9c48cf9fb 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -12,10 +12,10 @@ Azure:
 omsagent:
   image:
     repo: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod"
-    tag: "ciprod08072020"
-    tagWindows: "win-ciprod08072020"
+    tag: "ciprod09162020"
+    tagWindows: "win-ciprod09162020"
     pullPolicy: IfNotPresent
-    dockerProviderVersion: "10.0.0-4"
+    dockerProviderVersion: "10.0.0-5"
     agentVersion: "1.10.0.1"
   ## To get your workspace id and key do the following
   ## You can create a Azure Loganalytics workspace from portal.azure.com and get its ID & PRIMARY KEY from 'Advanced Settings' tab in the Ux.
diff --git a/kubernetes/linux/Dockerfile b/kubernetes/linux/Dockerfile
index bc27a5384..ee35cd556 100644
--- a/kubernetes/linux/Dockerfile
+++ b/kubernetes/linux/Dockerfile
@@ -2,7 +2,7 @@ FROM ubuntu:18.04
 MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
-ARG IMAGE_TAG=ciprod08072020
+ARG IMAGE_TAG=ciprod09162020
 ENV AGENT_VERSION ${IMAGE_TAG}
 ENV tmpdir /opt
 ENV APPLICATIONINSIGHTS_AUTH NzAwZGM5OGYtYTdhZC00NThkLWI5NWMtMjA3ZjM3NmM3YmRi
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 947620ebc..b71a95227 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -337,13 +337,13 @@ spec:
         tier: node
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.0.0-4"
+        dockerProviderVersion: "10.0.0-5"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08072020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod09162020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -493,13 +493,13 @@ spec:
         rsName: "omsagent-rs"
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.0.0-4"
+        dockerProviderVersion: "10.0.0-5"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod08072020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod09162020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -639,13 +639,13 @@ spec:
         tier: node-win
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.0.0-4"
+        dockerProviderVersion: "10.0.0-5"
         schema-versions: "v1"
     spec:
      serviceAccountName: omsagent
      containers:
        - name: omsagent-win
-         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod08072020"
+         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod09162020"
          imagePullPolicy: IfNotPresent
          resources:
           limits:
diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index 70a5f6045..ca89d1c80 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -3,7 +3,7 @@ MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
 
-ARG IMAGE_TAG=win-ciprod08072020
+ARG IMAGE_TAG=win-ciprod09162020
 
 # Do not split this into multiple RUN!
 # Docker creates a layer for every RUN-Statement
diff --git a/scripts/onboarding/managed/enable-monitoring.sh b/scripts/onboarding/managed/enable-monitoring.sh
index 17c075725..4142dbf6c 100644
--- a/scripts/onboarding/managed/enable-monitoring.sh
+++ b/scripts/onboarding/managed/enable-monitoring.sh
@@ -620,7 +620,7 @@ add_container_insights_solution $workspaceResourceId
 # get workspace guid and key
 get_workspace_guid_and_key $workspaceResourceId
 
-if [ "$isClusterAndWorkspaceInSameSubscription" = true ] ; then
+if [ "$isClusterAndWorkspaceInSameSubscription" = false ] ; then
   echo "switch to cluster subscription id as active subscription for cli: ${clusterSubscriptionId}"
   set_azure_subscription $clusterSubscriptionId
 fi

From 1a7ef1cfbfe611e8d14218167c393a2becafc8f9 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 16 Sep 2020 14:53:21 -0700
Subject: [PATCH 20/60] Gangams/fix weird conflicts (#439)

* separate build yamls for ci_prod branch (#415) (#416)

* [Merge] dev to prod for ciprod08072020 release (#424)

* separate build yamls for ci_prod branch (#415)

* re-enable adx path (#420)

* Gangams/release changes (#419)

* updates related to release

* updates related to release

* fix the incorrect version

* fix pr feedback

* fix some typos in the release notes

* fix for zero filled metrics (#423)

* consolidate windows agent image docker files (#422)

* consolidate windows agent image docker files

* revert docker file consolidation

* revert readme updates

* merge back windows dockerfiles

* image tag update

Co-authored-by: Vishwanath <visnara@microsoft.com>
Co-authored-by: rashmichandrashekar <rashmy@microsoft.com>

Co-authored-by: Vishwanath <visnara@microsoft.com>
Co-authored-by: rashmichandrashekar <rashmy@microsoft.com>
---
 ReleaseNotes.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index 547d00573..499c99f02 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -37,6 +37,7 @@ Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate t
 - Add region check before sending alertable metrics to MDM
 - Telemetry fix for agent telemetry for sov. clouds
 
+
 ### 07/15/2020 -
 ##### Version microsoft/oms:ciprod07152020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod07152020 (linux)
 ##### Version microsoft/oms:win-ciprod05262020-2 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod05262020-2 (windows)

From bf75bf04ac28f1462ea358ea4762610b0cf70553 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 21 Sep 2020 10:07:52 -0700
Subject: [PATCH 21/60] fix quote issue for the region (#441)

---
 scripts/onboarding/managed/enable-monitoring.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/onboarding/managed/enable-monitoring.sh b/scripts/onboarding/managed/enable-monitoring.sh
index 4142dbf6c..226fd978b 100644
--- a/scripts/onboarding/managed/enable-monitoring.sh
+++ b/scripts/onboarding/managed/enable-monitoring.sh
@@ -499,7 +499,7 @@ install_helm_chart()
  fi
 
  echo "getting the region of the cluster"
- clusterRegion=$(az resource show --ids ${clusterResourceId} --query location)
+ clusterRegion=$(az resource show --ids ${clusterResourceId} --query location -o tsv)
  echo "cluster region is : ${clusterRegion}"
 
  echo "adding helm repo:" $helmRepoName

From 6287724c89ae6e8d0ac74789e472c99fed28bb48 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Mon, 21 Sep 2020 14:16:21 -0700
Subject: [PATCH 22/60] fix cpucapacity/limit bug (#442)

---
 source/plugins/ruby/KubernetesApiClient.rb | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb
index 36dcdd8c6..073eb0417 100644
--- a/source/plugins/ruby/KubernetesApiClient.rb
+++ b/source/plugins/ruby/KubernetesApiClient.rb
@@ -719,6 +719,9 @@ def getMetricNumericValue(metricName, metricVal)
           if (metricValue.end_with?("m"))
             metricValue.chomp!("m")
             metricValue = Float(metricValue) * 1000.0 ** 2
+          elsif (metricValue.end_with?("k"))
+            metricValue.chomp!("k")
+            metricValue = Float(metricValue) * 1000.0
           else #assuming no units specified, it is cores that we are converting to nanocores (the below conversion will fail for other unsupported 'units')
             metricValue = Float(metricValue) * 1000.0 ** 3
           end

From bd30a47ecb9b6ea5867fbd9ceff4810d3b5d4431 Mon Sep 17 00:00:00 2001
From: Grace Wehner <grace.wehner@microsoft.com>
Date: Wed, 23 Sep 2020 09:01:24 -0700
Subject: [PATCH 23/60] grwehner/pv-usage-metrics (#431)

- Send persistent volume usage and capacity metrics to LA for PVs with PVCs at the pod level; config to include or exclude kube-system namespace.
- Send PV usage percentage to MDM if over the configurable threshold.
- Add PV usage recommended alert template.
---
 .../PVUsagePercentage.json                    | 174 ++++++++++++++++++
 build/linux/installer/conf/container.conf     |   2 +-
 build/linux/installer/conf/kube.conf          |   2 +-
 .../installer/datafiles/base_container.data   |   1 +
 .../scripts/tomlparser-mdm-metrics-config.rb  |  32 +++-
 .../tomlparser-metric-collection-config.rb    |  71 +++++++
 kubernetes/container-azm-ms-agentconfig.yaml  |  15 ++
 kubernetes/linux/main.sh                      |   8 +
 kubernetes/omsagent.yaml                      |   2 +-
 .../plugins/ruby/CAdvisorMetricsAPIClient.rb  |  77 ++++++++
 source/plugins/ruby/MdmAlertTemplates.rb      |  32 ++++
 source/plugins/ruby/MdmMetricsGenerator.rb    |  36 ++++
 source/plugins/ruby/constants.rb              |  11 ++
 source/plugins/ruby/filter_cadvisor2mdm.rb    |  76 +++++++-
 source/plugins/ruby/in_cadvisor_perf.rb       |   1 +
 source/plugins/ruby/in_win_cadvisor_perf.rb   |   1 +
 16 files changed, 533 insertions(+), 8 deletions(-)
 create mode 100644 alerts/recommended_alerts_ARM/PVUsagePercentage.json
 create mode 100644 build/linux/installer/scripts/tomlparser-metric-collection-config.rb

diff --git a/alerts/recommended_alerts_ARM/PVUsagePercentage.json b/alerts/recommended_alerts_ARM/PVUsagePercentage.json
new file mode 100644
index 000000000..e6cdbee15
--- /dev/null
+++ b/alerts/recommended_alerts_ARM/PVUsagePercentage.json
@@ -0,0 +1,174 @@
+{
+  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
+  "contentVersion": "1.0.0.0",
+  "parameters": {
+      "alertName": {
+          "type": "string",
+          "minLength": 1,
+          "metadata": {
+              "description": "Name of the alert"
+          }
+      },
+      "alertDescription": {
+          "type": "string",
+          "defaultValue": "This is a metric alert",
+          "metadata": {
+              "description": "Description of alert"
+          }
+      },
+      "alertSeverity": {
+          "type": "int",
+          "defaultValue": 3,
+          "allowedValues": [
+              0,
+              1,
+              2,
+              3,
+              4
+          ],
+          "metadata": {
+              "description": "Severity of alert {0,1,2,3,4}"
+          }
+      },
+      "isEnabled": {
+          "type": "bool",
+          "defaultValue": true,
+          "metadata": {
+              "description": "Specifies whether the alert is enabled"
+          }
+      },
+      "clusterResourceId": {
+          "type": "string",
+          "minLength": 1,
+          "metadata": {
+              "description": "Full Resource ID of the kubernetes cluster emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.ContainerService/managedClusters/cluster-xyz"
+          }
+      },
+      "operator": {
+          "type": "string",
+          "defaultValue": "GreaterThan",
+          "allowedValues": [
+              "Equals",
+              "NotEquals",
+              "GreaterThan",
+              "GreaterThanOrEqual",
+              "LessThan",
+              "LessThanOrEqual"
+          ],
+          "metadata": {
+              "description": "Operator comparing the current value with the threshold value."
+          }
+      },
+      "threshold": {
+          "type": "int",
+          "defaultValue": 80,
+          "metadata": {
+              "description": "The threshold value at which the alert is activated."
+          },
+          "minValue": 1,
+          "maxValue": 100
+      },
+      "timeAggregation": {
+          "type": "string",
+          "defaultValue": "Average",
+          "allowedValues": [
+              "Average",
+              "Minimum",
+              "Maximum",
+              "Count"
+          ],
+          "metadata": {
+              "description": "How the data that is collected should be combined over time."
+          }
+      },
+      "windowSize": {
+          "type": "string",
+          "defaultValue": "PT5M",
+          "allowedValues": [
+              "PT1M",
+              "PT5M",
+              "PT15M",
+              "PT30M",
+              "PT1H",
+              "PT6H",
+              "PT12H",
+              "PT24H"
+          ],
+          "metadata": {
+              "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format."
+          }
+      },
+      "evaluationFrequency": {
+          "type": "string",
+          "defaultValue": "PT1M",
+          "allowedValues": [
+              "PT1M",
+              "PT5M",
+              "PT15M",
+              "PT30M",
+              "PT1H"
+          ],
+          "metadata": {
+              "description": "how often the metric alert is evaluated represented in ISO 8601 duration format"
+          }
+      },
+      "actionGroupId": {
+          "type": "string",
+          "defaultValue": "",
+          "metadata": {
+              "description": "The ID of the action group that is triggered when the alert is activated or deactivated"
+          }
+      }
+  },
+  "variables": {},
+  "resources": [
+      {
+          "name": "[parameters('alertName')]",
+          "type": "Microsoft.Insights/metricAlerts",
+          "location": "global",
+          "apiVersion": "2018-03-01",
+          "tags": {},
+          "properties": {
+              "description": "[parameters('alertDescription')]",
+              "severity": "[parameters('alertSeverity')]",
+              "enabled": "[parameters('isEnabled')]",
+              "scopes": [
+                  "[parameters('clusterResourceId')]"
+              ],
+              "evaluationFrequency": "[parameters('evaluationFrequency')]",
+              "windowSize": "[parameters('windowSize')]",
+              "criteria": {
+                  "odata.type": "Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria",
+                  "allOf": [
+                      {
+                          "name": "1st criterion",
+                          "metricName": "pvUsageExceededPercentage",
+                          "metricNamespace": "Insights.Container/persistentvolumes",
+                          "dimensions": [
+                              {
+                                  "name": "kubernetesNamespace",
+                                  "operator": "Include",
+                                  "values": [
+                                      "*"
+                                  ]
+                              },
+                              {
+                                  "name": "podName",
+                                  "operator": "Include",
+                                  "values": [
+                                      "*"
+                                  ]
+                              }
+                          ],
+                          "operator": "[parameters('operator')]",
+                          "threshold": "[parameters('threshold')]",
+                          "timeAggregation": "[parameters('timeAggregation')]",
+                          "skipMetricValidation": true
+                      }
+                  ]
+              },
+              "actions": "[if(empty(parameters('actionGroupId')), json('null'), json(concat('[{\"actionGroupId\": \"',parameters('actionGroupId'),'\"}]')))]"
+          }
+      }
+  ]
+}
diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf
index f02ec0131..e55c62fbc 100644
--- a/build/linux/installer/conf/container.conf
+++ b/build/linux/installer/conf/container.conf
@@ -46,7 +46,7 @@
 <filter mdm.cadvisorperf**>
   type filter_cadvisor2mdm
   custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
-  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes
+  metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes
   log_level info
 </filter>
 
diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf
index 9ada8425f..ba40b7a35 100644
--- a/build/linux/installer/conf/kube.conf
+++ b/build/linux/installer/conf/kube.conf
@@ -74,7 +74,7 @@
     <filter mdm.cadvisorperf**>
      type filter_cadvisor2mdm
      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
-     metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
+     metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes
      log_level info
     </filter>
 
diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data
index 87b89b14c..ca2538b79 100644
--- a/build/linux/installer/datafiles/base_container.data
+++ b/build/linux/installer/datafiles/base_container.data
@@ -120,6 +120,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/livenessprobe.sh;                                          build/linux/installer/scripts/livenessprobe.sh;      755; root; root
 /opt/tomlparser-prom-customconfig.rb;                           build/linux/installer/scripts/tomlparser-prom-customconfig.rb;     755; root; root
 /opt/tomlparser-mdm-metrics-config.rb;                          build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb;     755; root; root
+/opt/tomlparser-metric-collection-config.rb;                    build/linux/installer/scripts/tomlparser-metric-collection-config.rb;     755; root; root
 
 /opt/tomlparser-health-config.rb;                               build/linux/installer/scripts/tomlparser-health-config.rb;     755; root; root
 /opt/tomlparser.rb;                                             build/common/installer/scripts/tomlparser.rb;     755; root; root
diff --git a/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb b/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb
index 1c01dd8c6..345c51633 100644
--- a/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb
+++ b/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb
@@ -12,6 +12,7 @@
 @percentageCpuUsageThreshold = Constants::DEFAULT_MDM_CPU_UTILIZATION_THRESHOLD
 @percentageMemoryRssThreshold = Constants::DEFAULT_MDM_MEMORY_RSS_THRESHOLD
 @percentageMemoryWorkingSetThreshold = Constants::DEFAULT_MDM_MEMORY_WORKING_SET_THRESHOLD
+@percentagePVUsageThreshold = Constants::DEFAULT_MDM_PV_UTILIZATION_THRESHOLD
 
 # Use parser to parse the configmap toml file to a ruby structure
 def parseConfigMap
@@ -35,7 +36,7 @@ def parseConfigMap
 # Use the ruby structure created after config parsing to set the right values to be used for MDM metric configuration settings
 def populateSettingValuesFromConfigMap(parsedConfig)
   if !parsedConfig.nil? && !parsedConfig[:alertable_metrics_configuration_settings].nil?
-    # Get mdm metrics config settings for resource utilization
+    # Get mdm metrics config settings for container resource utilization
     begin
       resourceUtilization = parsedConfig[:alertable_metrics_configuration_settings][:container_resource_utilization_thresholds]
       if !resourceUtilization.nil?
@@ -66,7 +67,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
           puts "config::Non floating point value or value not convertible to float specified for Memory Working Set threshold, using default "
           @percentageMemoryWorkingSetThreshold = Constants::DEFAULT_MDM_MEMORY_WORKING_SET_THRESHOLD
         end
-        puts "config::Using config map settings for MDM metric configuration settings for resource utilization"
+        puts "config::Using config map settings for MDM metric configuration settings for container resource utilization"
       end
     rescue => errorStr
       ConfigParseErrorLogger.logError("Exception while reading config map settings for MDM metric configuration settings for resource utilization - #{errorStr}, using defaults, please check config map for errors")
@@ -74,6 +75,32 @@ def populateSettingValuesFromConfigMap(parsedConfig)
       @percentageMemoryRssThreshold = Constants::DEFAULT_MDM_MEMORY_RSS_THRESHOLD
       @percentageMemoryWorkingSetThreshold = Constants::DEFAULT_MDM_MEMORY_WORKING_SET_THRESHOLD
     end
+
+    # Get mdm metrics config settings for PV utilization
+    begin
+      isUsingPVThresholdConfig = false
+      pvUtilizationThresholds = parsedConfig[:alertable_metrics_configuration_settings][:pv_utilization_thresholds]
+      if !pvUtilizationThresholds.nil?
+        pvUsageThreshold = pvUtilizationThresholds[:pv_usage_threshold_percentage]
+        if !pvUsageThreshold.nil?
+          pvUsageThresholdFloat = pvUsageThreshold.to_f
+          if pvUsageThresholdFloat.kind_of? Float
+            @percentagePVUsageThreshold = pvUsageThresholdFloat
+            isUsingPVThresholdConfig = true
+          end
+        end
+      end
+
+      if isUsingPVThresholdConfig
+        puts "config::Using config map settings for MDM metric configuration settings for PV utilization"
+      else
+        puts "config::Non floating point value or value not convertible to float specified for PV threshold, using default "
+        @percentagePVUsageThreshold = Constants::DEFAULT_MDM_PV_UTILIZATION_THRESHOLD
+      end
+    rescue => errorStr
+      ConfigParseErrorLogger.logError("Exception while reading config map settings for MDM metric configuration settings for PV utilization - #{errorStr}, using defaults, please check config map for errors")
+      @percentagePVUsageThreshold = Constants::DEFAULT_MDM_PV_UTILIZATION_THRESHOLD
+    end
   end
 end
 
@@ -97,6 +124,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   file.write("export AZMON_ALERT_CONTAINER_CPU_THRESHOLD=#{@percentageCpuUsageThreshold}\n")
   file.write("export AZMON_ALERT_CONTAINER_MEMORY_RSS_THRESHOLD=#{@percentageMemoryRssThreshold}\n")
   file.write("export AZMON_ALERT_CONTAINER_MEMORY_WORKING_SET_THRESHOLD=\"#{@percentageMemoryWorkingSetThreshold}\"\n")
+  file.write("export AZMON_ALERT_PV_USAGE_THRESHOLD=#{@percentagePVUsageThreshold}\n")
   # Close file after writing all MDM setting environment variables
   file.close
   puts "****************End MDM Metrics Config Processing********************"
diff --git a/build/linux/installer/scripts/tomlparser-metric-collection-config.rb b/build/linux/installer/scripts/tomlparser-metric-collection-config.rb
new file mode 100644
index 000000000..40d87b7f1
--- /dev/null
+++ b/build/linux/installer/scripts/tomlparser-metric-collection-config.rb
@@ -0,0 +1,71 @@
+#!/usr/local/bin/ruby
+# frozen_string_literal: true
+
+require_relative "tomlrb"
+require_relative "ConfigParseErrorLogger"
+require_relative "microsoft/omsagent/plugin/constants"
+
+@configMapMountPath = "/etc/config/settings/metric_collection_settings"
+@configVersion = ""
+@configSchemaVersion = ""
+
+# Setting default values which will be used in case they are not set in the configmap or if configmap doesnt exist
+@collectPVKubeSystemMetrics = false
+
+# Use parser to parse the configmap toml file to a ruby structure
+def parseConfigMap
+  begin
+    # Check to see if config map is created
+    if (File.file?(@configMapMountPath))
+      puts "config::configmap container-azm-ms-agentconfig for metric collection settings mounted, parsing values"
+      parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true)
+      puts "config::Successfully parsed mounted config map"
+      return parsedConfig
+    else
+      puts "config::configmap container-azm-ms-agentconfig for metric collection settings not mounted, using defaults"
+      return nil
+    end
+  rescue => errorStr
+    ConfigParseErrorLogger.logError("Exception while parsing config map for metric collection settings: #{errorStr}, using defaults, please check config map for errors")
+    return nil
+  end
+end
+
+# Use the ruby structure created after config parsing to set the right values to be used for metric collection settings
+def populateSettingValuesFromConfigMap(parsedConfig)
+  # Get metric collection settings for including or excluding kube-system namespace in PV metrics
+  begin
+    if !parsedConfig.nil? && !parsedConfig[:metric_collection_settings][:collect_kube_system_pv_metrics].nil? && !parsedConfig[:metric_collection_settings][:collect_kube_system_pv_metrics][:enabled].nil?
+      @collectPVKubeSystemMetrics = parsedConfig[:metric_collection_settings][:collect_kube_system_pv_metrics][:enabled]
+      puts "config::Using config map setting for PV kube-system collection"
+    end
+  rescue => errorStr
+    ConfigParseErrorLogger.logError("Exception while reading config map settings for PV kube-system collection - #{errorStr}, using defaults, please check config map for errors")
+  end
+end
+
+@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+puts "****************Start Metric Collection Settings Processing********************"
+if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version, so hardcoding it
+  configMapSettings = parseConfigMap
+  if !configMapSettings.nil?
+    populateSettingValuesFromConfigMap(configMapSettings)
+  end
+else
+  if (File.file?(@configMapMountPath))
+    ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
+  end
+end
+
+# Write the settings to file, so that they can be set as environment variables
+file = File.open("config_metric_collection_env_var", "w")
+
+if !file.nil?
+  file.write("export AZMON_PV_COLLECT_KUBE_SYSTEM_METRICS=#{@collectPVKubeSystemMetrics}\n")
+  # Close file after writing all metric collection setting environment variables
+  file.close
+  puts "****************End Metric Collection Settings Processing********************"
+else
+  puts "Exception while opening file for writing MDM metric config environment variables"
+  puts "****************End Metric Collection Settings Processing********************"
+end
diff --git a/kubernetes/container-azm-ms-agentconfig.yaml b/kubernetes/container-azm-ms-agentconfig.yaml
index 58e09f041..aec1bb456 100644
--- a/kubernetes/container-azm-ms-agentconfig.yaml
+++ b/kubernetes/container-azm-ms-agentconfig.yaml
@@ -42,6 +42,7 @@ data:
           # When the setting is set to false, only the kube events with !normal event type will be collected
           enabled = false
           # When this is enabled (enabled = true), all kube events including normal events will be collected
+
   prometheus-data-collection-settings: |-
     # Custom Prometheus metrics data collection settings
     [prometheus_data_collection_settings.cluster]
@@ -90,6 +91,15 @@ data:
         #fieldpass = ["metric_to_pass1", "metric_to_pass12"]
 
         #fielddrop = ["metric_to_drop"]
+
+  metric_collection_settings: |-
+    # Metrics collection settings for metrics sent to Log Analytics and MDM
+    [metric_collection_settings.collect_kube_system_pv_metrics]
+      # In the absense of this configmap, default value for collect_kube_system_pv_metrics is false
+      # When the setting is set to false, only the persistent volume metrics outside the kube-system namespace will be collected
+      enabled = false
+      # When this is enabled (enabled = true), persistent volume metrics including those in the kube-system namespace will be collected
+
   alertable-metrics-configuration-settings: |-
     # Alertable metrics configuration settings for container resource utilization
     [alertable_metrics_configuration_settings.container_resource_utilization_thresholds]
@@ -100,6 +110,11 @@ data:
         container_memory_rss_threshold_percentage = 95.0
         # Threshold for container memoryWorkingSet, metric will be sent only when memory working set exceeds or becomes equal to the following percentage
         container_memory_working_set_threshold_percentage = 95.0
+
+    # Alertable metrics configuration settings for persistent volume utilization
+    [alertable_metrics_configuration_settings.pv_utilization_thresholds]
+        # Threshold for persistent volume usage bytes, metric will be sent only when persistent volume utilization exceeds or becomes equal to the following percentage
+        pv_usage_threshold_percentage = 60.0
   integrations: |-
     [integrations.azure_network_policy_manager]
         collect_basic_metrics = false
diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh
index 311470660..d9fdc42e9 100644
--- a/kubernetes/linux/main.sh
+++ b/kubernetes/linux/main.sh
@@ -236,6 +236,14 @@ cat config_mdm_metrics_env_var | while read line; do
 done
 source config_mdm_metrics_env_var
 
+#Parse the configmap to set the right environment variables for metric collection settings
+/opt/microsoft/omsagent/ruby/bin/ruby tomlparser-metric-collection-config.rb
+
+cat config_metric_collection_env_var | while read line; do
+    echo $line >> ~/.bashrc
+done
+source config_metric_collection_env_var
+
 #Setting environment variable for CAdvisor metrics to use port 10255/10250 based on curl request
 echo "Making wget request to cadvisor endpoint with port 10250"
 #Defaults to use port 10255
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index b71a95227..5cda4dcb3 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -125,7 +125,7 @@ data:
      <filter mdm.cadvisorperf**>
       type filter_cadvisor2mdm
       custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
-      metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
+      metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes
       log_level info
      </filter>
 
diff --git a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
index 13796cd1e..7661bb7a1 100644
--- a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
+++ b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
@@ -20,6 +20,7 @@ class CAdvisorMetricsAPIClient
   @clusterEnvVarCollectionEnabled = ENV["AZMON_CLUSTER_COLLECT_ENV_VAR"]
   @clusterStdErrLogCollectionEnabled = ENV["AZMON_COLLECT_STDERR_LOGS"]
   @clusterStdOutLogCollectionEnabled = ENV["AZMON_COLLECT_STDOUT_LOGS"]
+  @pvKubeSystemCollectionMetricsEnabled = ENV["AZMON_PV_COLLECT_KUBE_SYSTEM_METRICS"]
   @clusterLogTailExcludPath = ENV["AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH"]
   @clusterLogTailPath = ENV["AZMON_LOG_TAIL_PATH"]
   @clusterAgentSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
@@ -53,6 +54,7 @@ class CAdvisorMetricsAPIClient
   @@winNodePrevMetricRate = {}
   @@telemetryCpuMetricTimeTracker = DateTime.now.to_time.to_i
   @@telemetryMemoryMetricTimeTracker = DateTime.now.to_time.to_i
+  @@telemetryPVKubeSystemMetricsTimeTracker = DateTime.now.to_time.to_i
 
   #Containers a hash of node name and the last time telemetry was sent for this node
   @@nodeTelemetryTimeTracker = {}
@@ -301,6 +303,8 @@ def getInsightsMetrics(winNode: nil, metricTime: Time.now.utc.iso8601)
           metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "memoryTotal", "containerGpumemoryTotalBytes", metricTime))
           metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "memoryUsed","containerGpumemoryUsedBytes", metricTime))
           metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "dutyCycle","containerGpuDutyCycle", metricTime))
+
+          metricDataItems.concat(getPersistentVolumeMetrics(metricInfo, hostName, "usedBytes", Constants::PV_USED_BYTES, metricTime))
         else
           @Log.warn("Couldn't get Insights metrics information for host: #{hostName} os:#{operatingSystem}")
         end
@@ -311,6 +315,79 @@ def getInsightsMetrics(winNode: nil, metricTime: Time.now.utc.iso8601)
       return metricDataItems
     end
 
+    def getPersistentVolumeMetrics(metricJSON, hostName, metricNameToCollect, metricNameToReturn, metricPollTime)
+      telemetryTimeDifference = (DateTime.now.to_time.to_i - @@telemetryPVKubeSystemMetricsTimeTracker).abs
+      telemetryTimeDifferenceInMinutes = telemetryTimeDifference / 60
+
+      metricItems = []
+      clusterId = KubernetesApiClient.getClusterId
+      clusterName = KubernetesApiClient.getClusterName
+      begin
+        metricInfo = metricJSON
+        metricInfo["pods"].each do |pod|
+
+          podNamespace = pod["podRef"]["namespace"]
+          excludeNamespace = false
+          if (podNamespace.downcase == "kube-system") && @pvKubeSystemCollectionMetricsEnabled == "false"
+            excludeNamespace = true
+          end
+
+          if (!excludeNamespace && !pod["volume"].nil?)
+            pod["volume"].each do |volume|
+              if (!volume["pvcRef"].nil?)
+                pvcRef = volume["pvcRef"]
+                if (!pvcRef["name"].nil?)
+
+                  # A PVC exists on this volume
+                  podUid = pod["podRef"]["uid"]
+                  podName = pod["podRef"]["name"]
+                  pvcName = pvcRef["name"]
+                  pvcNamespace = pvcRef["namespace"]
+
+                  metricItem = {}
+                  metricItem["CollectionTime"] = metricPollTime
+                  metricItem["Computer"] = hostName
+                  metricItem["Name"] = metricNameToReturn
+                  metricItem["Value"] = volume[metricNameToCollect]
+                  metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN 
+                  metricItem["Namespace"] = Constants::INSIGTHTSMETRICS_TAGS_PV_NAMESPACE
+                      
+                  metricTags = {}
+                  metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID ] = clusterId
+                  metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
+                  metricTags[Constants::INSIGHTSMETRICS_TAGS_POD_UID] = podUid
+                  metricTags[Constants::INSIGHTSMETRICS_TAGS_POD_NAME] = podName
+                  metricTags[Constants::INSIGHTSMETRICS_TAGS_PVC_NAME] = pvcName
+                  metricTags[Constants::INSIGHTSMETRICS_TAGS_PVC_NAMESPACE] = pvcNamespace
+                  metricTags[Constants::INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES] = volume["capacityBytes"]
+
+                  metricItem["Tags"] = metricTags
+                      
+                  metricItems.push(metricItem)
+                end
+              end
+            end
+          end
+        end
+      rescue => errorStr
+        @Log.warn("getPersistentVolumeMetrics failed: #{errorStr} for metric #{metricNameToCollect}")
+        return metricItems
+      end
+
+      # If kube-system metrics collection enabled, send telemetry
+      begin
+        if telemetryTimeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES && @pvKubeSystemCollectionMetricsEnabled == "true"
+          ApplicationInsightsUtility.sendCustomEvent(Constants::PV_KUBE_SYSTEM_METRICS_ENABLED_EVENT, {})
+          @@telemetryPVKubeSystemMetricsTimeTracker = DateTime.now.to_time.to_i
+        end
+      rescue => errorStr
+        @Log.warn("getPersistentVolumeMetrics kube-system metrics enabled telemetry failed: #{errorStr}")
+      end
+
+      return metricItems
+    end
+
+
     def getContainerGpuMetricsAsInsightsMetrics(metricJSON, hostName, metricNameToCollect, metricNametoReturn, metricPollTime)
       metricItems = []
       clusterId = KubernetesApiClient.getClusterId
diff --git a/source/plugins/ruby/MdmAlertTemplates.rb b/source/plugins/ruby/MdmAlertTemplates.rb
index 2e516a99d..d5107fea1 100644
--- a/source/plugins/ruby/MdmAlertTemplates.rb
+++ b/source/plugins/ruby/MdmAlertTemplates.rb
@@ -90,6 +90,38 @@ class MdmAlertTemplates
         }
     }'
 
+  PV_resource_utilization_template = '
+    {
+        "time": "%{timestamp}",
+        "data": {
+            "baseData": {
+                "metric": "%{metricName}",
+                "namespace": "insights.container/persistentvolumes",
+                "dimNames": [
+                    "podName",
+                    "node",
+                    "kubernetesNamespace",
+                    "thresholdPercentage"
+                ],
+                "series": [
+                {
+                    "dimValues": [
+                        "%{podNameDimValue}",
+                        "%{computerNameDimValue}",
+                        "%{namespaceDimValue}",
+                        "%{thresholdPercentageDimValue}"
+                    ],
+                    "min": %{pvResourceUtilizationPercentage},
+                    "max": %{pvResourceUtilizationPercentage},
+                    "sum": %{pvResourceUtilizationPercentage},
+                    "count": 1
+                }
+                ]
+            }
+        }
+    }'
+
+
   Node_resource_metrics_template = '
             {
                 "time": "%{timestamp}",
diff --git a/source/plugins/ruby/MdmMetricsGenerator.rb b/source/plugins/ruby/MdmMetricsGenerator.rb
index 3d75dc6f4..1e7db37cc 100644
--- a/source/plugins/ruby/MdmMetricsGenerator.rb
+++ b/source/plugins/ruby/MdmMetricsGenerator.rb
@@ -37,6 +37,10 @@ class MdmMetricsGenerator
     Constants::MEMORY_WORKING_SET_BYTES => Constants::MDM_CONTAINER_MEMORY_WORKING_SET_UTILIZATION_METRIC,
   }
 
+  @@pod_metric_name_metric_percentage_name_hash = {
+    Constants::PV_USED_BYTES => Constants::MDM_PV_UTILIZATION_METRIC
+  }
+
   # Setting this to true since we need to send zero filled metrics at startup. If metrics are absent alert creation fails
   @sendZeroFilledMetrics = true
 
@@ -259,6 +263,31 @@ def getContainerResourceUtilMetricRecords(recordTimeStamp, metricName, percentag
       return records
     end
 
+    def getPVResourceUtilMetricRecords(recordTimeStamp, metricName, computer, percentageMetricValue, dims, thresholdPercentage)
+      records = []
+      begin
+        containerName = dims[Constants::INSIGHTSMETRICS_TAGS_CONTAINER_NAME]
+        pvcNamespace = dims[Constants::INSIGHTSMETRICS_TAGS_PVC_NAMESPACE]
+        podName = dims[Constants::INSIGHTSMETRICS_TAGS_POD_NAME]
+        podUid = dims[Constants::INSIGHTSMETRICS_TAGS_POD_UID]
+
+        resourceUtilRecord = MdmAlertTemplates::PV_resource_utilization_template % {
+          timestamp: recordTimeStamp,
+          metricName: @@pod_metric_name_metric_percentage_name_hash[metricName],
+          podNameDimValue: podName,
+          computerNameDimValue: computer,
+          namespaceDimValue: pvcNamespace,
+          pvResourceUtilizationPercentage: percentageMetricValue,
+          thresholdPercentageDimValue: thresholdPercentage,
+        }
+        records.push(Yajl::Parser.parse(StringIO.new(resourceUtilRecord)))
+      rescue => errorStr
+        @log.info "Error in getPVResourceUtilMetricRecords: #{errorStr}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+      return records
+    end
+
     def getDiskUsageMetricRecords(record)
       records = []
       usedPercent = nil
@@ -356,6 +385,7 @@ def getContainerResourceUtilizationThresholds
         metric_threshold_hash[Constants::CPU_USAGE_NANO_CORES] = Constants::DEFAULT_MDM_CPU_UTILIZATION_THRESHOLD
         metric_threshold_hash[Constants::MEMORY_RSS_BYTES] = Constants::DEFAULT_MDM_MEMORY_RSS_THRESHOLD
         metric_threshold_hash[Constants::MEMORY_WORKING_SET_BYTES] = Constants::DEFAULT_MDM_MEMORY_WORKING_SET_THRESHOLD
+        metric_threshold_hash[Constants::PV_USED_BYTES] = Constants::DEFAULT_MDM_PV_UTILIZATION_THRESHOLD
 
         cpuThreshold = ENV["AZMON_ALERT_CONTAINER_CPU_THRESHOLD"]
         if !cpuThreshold.nil? && !cpuThreshold.empty?
@@ -375,6 +405,12 @@ def getContainerResourceUtilizationThresholds
           memoryWorkingSetThresholdFloat = (memoryWorkingSetThreshold.to_f).round(2)
           metric_threshold_hash[Constants::MEMORY_WORKING_SET_BYTES] = memoryWorkingSetThresholdFloat
         end
+
+        pvUsagePercentageThreshold = ENV["AZMON_ALERT_PV_USAGE_THRESHOLD"]
+        if !pvUsagePercentageThreshold.nil? && !pvUsagePercentageThreshold.empty?
+          pvUsagePercentageThresholdFloat = (pvUsagePercentageThreshold.to_f).round(2)
+          metric_threshold_hash[Constants::PV_USED_BYTES] = pvUsagePercentageThresholdFloat
+        end
       rescue => errorStr
         @log.info "Error in getContainerResourceUtilizationThresholds: #{errorStr}"
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index dd1ba24b3..82a6e8814 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -13,6 +13,12 @@ class Constants
     INSIGHTSMETRICS_TAGS_K8SNAMESPACE = "k8sNamespace"
     INSIGHTSMETRICS_TAGS_CONTROLLER_NAME = "controllerName"
     INSIGHTSMETRICS_TAGS_CONTROLLER_KIND = "controllerKind"
+    INSIGHTSMETRICS_TAGS_POD_UID = "podUid"
+    INSIGTHTSMETRICS_TAGS_PV_NAMESPACE = "container.azm.ms/pv"
+    INSIGHTSMETRICS_TAGS_PVC_NAME = "pvcName"
+    INSIGHTSMETRICS_TAGS_PVC_NAMESPACE = "pvcNamespace"
+    INSIGHTSMETRICS_TAGS_POD_NAME = "podName"
+    INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES = "pvCapacityBytes"
     INSIGHTSMETRICS_FLUENT_TAG = "oms.api.InsightsMetrics"
     REASON_OOM_KILLED = "oomkilled"
     #Kubestate (common)
@@ -45,6 +51,7 @@ class Constants
     MDM_CONTAINER_CPU_UTILIZATION_METRIC = "cpuExceededPercentage"
     MDM_CONTAINER_MEMORY_RSS_UTILIZATION_METRIC = "memoryRssExceededPercentage"
     MDM_CONTAINER_MEMORY_WORKING_SET_UTILIZATION_METRIC = "memoryWorkingSetExceededPercentage"
+    MDM_PV_UTILIZATION_METRIC = "pvUsageExceededPercentage"
     MDM_NODE_CPU_USAGE_PERCENTAGE = "cpuUsagePercentage"
     MDM_NODE_MEMORY_RSS_PERCENTAGE = "memoryRssPercentage"
     MDM_NODE_MEMORY_WORKING_SET_PERCENTAGE = "memoryWorkingSetPercentage"
@@ -56,9 +63,11 @@ class Constants
     CPU_USAGE_MILLI_CORES = "cpuUsageMillicores"
     MEMORY_WORKING_SET_BYTES= "memoryWorkingSetBytes"
     MEMORY_RSS_BYTES = "memoryRssBytes"
+    PV_USED_BYTES = "pvUsedBytes"
     DEFAULT_MDM_CPU_UTILIZATION_THRESHOLD = 95.0
     DEFAULT_MDM_MEMORY_RSS_THRESHOLD = 95.0
     DEFAULT_MDM_MEMORY_WORKING_SET_THRESHOLD = 95.0
+    DEFAULT_MDM_PV_UTILIZATION_THRESHOLD = 60.0
     CONTROLLER_KIND_JOB = "job"
     CONTAINER_TERMINATION_REASON_COMPLETED = "completed"
     CONTAINER_STATE_TERMINATED = "terminated"
@@ -71,6 +80,8 @@ class Constants
     CONTAINER_METRICS_HEART_BEAT_EVENT = "ContainerMetricsMdmHeartBeatEvent"
     POD_READY_PERCENTAGE_HEART_BEAT_EVENT = "PodReadyPercentageMdmHeartBeatEvent"
     CONTAINER_RESOURCE_UTIL_HEART_BEAT_EVENT = "ContainerResourceUtilMdmHeartBeatEvent"
+    PV_USAGE_HEART_BEAT_EVENT = "PVUsageMdmHeartBeatEvent"
+    PV_KUBE_SYSTEM_METRICS_ENABLED_EVENT = "CollectPVKubeSystemMetricsEnabled"
     TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 10
     KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 15
     MDM_TIME_SERIES_FLUSHED_IN_LAST_HOUR = "MdmTimeSeriesFlushedInLastHour"
diff --git a/source/plugins/ruby/filter_cadvisor2mdm.rb b/source/plugins/ruby/filter_cadvisor2mdm.rb
index fd43ef98b..3bc674ea8 100644
--- a/source/plugins/ruby/filter_cadvisor2mdm.rb
+++ b/source/plugins/ruby/filter_cadvisor2mdm.rb
@@ -16,7 +16,7 @@ class CAdvisor2MdmFilter < Filter
     config_param :enable_log, :integer, :default => 0
     config_param :log_path, :string, :default => "/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log"
     config_param :custom_metrics_azure_regions, :string
-    config_param :metrics_to_collect, :string, :default => "Constants::CPU_USAGE_NANO_CORES,Constants::MEMORY_WORKING_SET_BYTES,Constants::MEMORY_RSS_BYTES"
+    config_param :metrics_to_collect, :string, :default => "Constants::CPU_USAGE_NANO_CORES,Constants::MEMORY_WORKING_SET_BYTES,Constants::MEMORY_RSS_BYTES,Constants::PV_USED_BYTES"
 
     @@hostName = (OMS::Common.get_hostname)
 
@@ -46,11 +46,13 @@ def start
         @metrics_to_collect_hash = build_metrics_hash
         @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}"
         @@containerResourceUtilTelemetryTimeTracker = DateTime.now.to_time.to_i
+        @@pvUsageTelemetryTimeTracker = DateTime.now.to_time.to_i
 
         # These variables keep track if any resource utilization threshold exceeded in the last 10 minutes
         @containersExceededCpuThreshold = false
         @containersExceededMemRssThreshold = false
         @containersExceededMemWorkingSetThreshold = false
+        @pvExceededUsageThreshold = false
 
         # initialize cpu and memory limit
         if @process_incoming_stream
@@ -60,6 +62,7 @@ def start
           @containerCpuLimitHash = {}
           @containerMemoryLimitHash = {}
           @containerResourceDimensionHash = {}
+          @pvUsageHash = {}
           @@metric_threshold_hash = MdmMetricsGenerator.getContainerResourceUtilizationThresholds
         end
       rescue => e
@@ -87,6 +90,8 @@ def setThresholdExceededTelemetry(metricName)
           @containersExceededMemRssThreshold = true
         elsif metricName == Constants::MEMORY_WORKING_SET_BYTES
           @containersExceededMemWorkingSetThreshold = true
+        elsif metricName == Constants::PV_USED_BYTES
+          @pvExceededUsageThreshold = true
         end
       rescue => errorStr
         @log.info "Error in setThresholdExceededTelemetry: #{errorStr}"
@@ -109,13 +114,30 @@ def flushMetricTelemetry
           properties["MemRssThresholdExceededInLastFlushInterval"] = @containersExceededMemRssThreshold
           properties["MemWSetThresholdExceededInLastFlushInterval"] = @containersExceededMemWorkingSetThreshold
           ApplicationInsightsUtility.sendCustomEvent(Constants::CONTAINER_RESOURCE_UTIL_HEART_BEAT_EVENT, properties)
-          @@containerResourceUtilTelemetryTimeTracker = DateTime.now.to_time.to_i
           @containersExceededCpuThreshold = false
           @containersExceededMemRssThreshold = false
           @containersExceededMemWorkingSetThreshold = false
+          @@containerResourceUtilTelemetryTimeTracker = DateTime.now.to_time.to_i
+        end
+      rescue => errorStr
+        @log.info "Error in flushMetricTelemetry: #{errorStr} for container resource util telemetry"
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+
+      # Also send for PV usage metrics
+      begin
+        pvTimeDifference = (DateTime.now.to_time.to_i - @@pvUsageTelemetryTimeTracker).abs
+        pvTimeDifferenceInMinutes = pvTimeDifference / 60
+        if (pvTimeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
+          pvProperties = {}
+          pvProperties["PVUsageThresholdPercentage"] = @@metric_threshold_hash[Constants::PV_USED_BYTES]
+          pvProperties["PVUsageThresholdExceededInLastFlushInterval"] = @pvExceededUsageThreshold
+          ApplicationInsightsUtility.sendCustomEvent(Constants::PV_USAGE_HEART_BEAT_EVENT, pvProperties)
+          @pvExceededUsageThreshold = false
+          @@pvUsageTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
       rescue => errorStr
-        @log.info "Error in flushMetricTelemetry: #{errorStr}"
+        @log.info "Error in flushMetricTelemetry: #{errorStr} for PV usage telemetry"
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
     end
@@ -123,6 +145,13 @@ def flushMetricTelemetry
     def filter(tag, time, record)
       begin
         if @process_incoming_stream
+
+          # Check if insights metrics for PV metrics
+          data_type = record["DataType"]
+          if data_type == "INSIGHTS_METRICS_BLOB"
+            return filterPVInsightsMetrics(record)
+          end
+
           object_name = record["DataItems"][0]["ObjectName"]
           counter_name = record["DataItems"][0]["Collections"][0]["CounterName"]
           percentage_metric_value = 0.0
@@ -204,6 +233,47 @@ def filter(tag, time, record)
       end
     end
 
+    def filterPVInsightsMetrics(record)
+      begin
+        mdmMetrics = []
+        record["DataItems"].each do |dataItem|
+
+          if dataItem["Name"] == Constants::PV_USED_BYTES && @metrics_to_collect_hash.key?(dataItem["Name"].downcase)
+            metricName = dataItem["Name"]
+            usage = dataItem["Value"]
+            capacity = dataItem["Tags"][Constants::INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES]
+            if capacity != 0
+              percentage_metric_value = (usage * 100.0) / capacity
+            end
+            @log.info "percentage_metric_value for metric: #{metricName} percentage: #{percentage_metric_value}"
+            @log.info "@@metric_threshold_hash for #{metricName}: #{@@metric_threshold_hash[metricName]}"
+
+            computer = dataItem["Computer"]
+            resourceDimensions = dataItem["Tags"]
+            thresholdPercentage = @@metric_threshold_hash[metricName]
+
+            flushMetricTelemetry
+            if percentage_metric_value >= thresholdPercentage
+              setThresholdExceededTelemetry(metricName)
+              return MdmMetricsGenerator.getPVResourceUtilMetricRecords(dataItem["CollectionTime"],
+                                                                       metricName,
+                                                                       computer,
+                                                                       percentage_metric_value,
+                                                                       resourceDimensions,
+                                                                       thresholdPercentage)
+            else
+              return []
+            end # end if block for percentage metric > configured threshold % check
+          end # end if block for dataItem name check
+        end # end for block of looping through data items
+        return []
+      rescue Exception => e
+        @log.info "Error processing cadvisor insights metrics record Exception: #{e.class} Message: #{e.message}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(e.backtrace)
+        return [] #return empty array if we ran into any errors
+      end
+    end
+
     def ensure_cpu_memory_capacity_set
       if @cpu_capacity != 0.0 && @memory_capacity != 0.0
         @log.info "CPU And Memory Capacity are already set"
diff --git a/source/plugins/ruby/in_cadvisor_perf.rb b/source/plugins/ruby/in_cadvisor_perf.rb
index a44365e9d..b706ff00a 100644
--- a/source/plugins/ruby/in_cadvisor_perf.rb
+++ b/source/plugins/ruby/in_cadvisor_perf.rb
@@ -88,6 +88,7 @@ def enumerate()
           end
 
           router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+          router.emit_stream(@mdmtag, insightsMetricsEventStream) if insightsMetricsEventStream
           
           if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
             $log.info("cAdvisorInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
diff --git a/source/plugins/ruby/in_win_cadvisor_perf.rb b/source/plugins/ruby/in_win_cadvisor_perf.rb
index 38868f2f5..4e90195e5 100644
--- a/source/plugins/ruby/in_win_cadvisor_perf.rb
+++ b/source/plugins/ruby/in_win_cadvisor_perf.rb
@@ -101,6 +101,7 @@ def enumerate()
             end
 
             router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+            router.emit_stream(@mdmtag, insightsMetricsEventStream) if insightsMetricsEventStream
             if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
               $log.info("winCAdvisorInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
             end 

From 7304a6b32652a870087ac39f49b640bca85da1c1 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 23 Sep 2020 13:00:03 -0700
Subject: [PATCH 24/60] add new custom metric regions (#444)

* add new custom metric regions

* fix commas
---
 build/linux/installer/conf/container.conf            |  4 ++--
 build/linux/installer/conf/kube.conf                 |  6 +++---
 .../templates/omsagent-rs-configmap.yaml             |  6 +++---
 kubernetes/omsagent.yaml                             |  6 +++---
 scripts/troubleshoot/TroubleshootError.ps1           | 12 +++++++++++-
 5 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf
index e55c62fbc..f7e6e1da9 100644
--- a/build/linux/installer/conf/container.conf
+++ b/build/linux/installer/conf/container.conf
@@ -45,14 +45,14 @@
 #custom_metrics_mdm filter plugin
 <filter mdm.cadvisorperf**>
   type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
   metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes
   log_level info
 </filter>
 
 <filter oms.mdm.container.perf.telegraf**>
   type filter_telegraf2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
   log_level debug
 </filter>
 
diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf
index ba40b7a35..dbb4db0da 100644
--- a/build/linux/installer/conf/kube.conf
+++ b/build/linux/installer/conf/kube.conf
@@ -13,7 +13,7 @@
      tag oms.containerinsights.KubePodInventory
      run_interval 60
      log_level debug
-     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
     </source>
 
     #Kubernetes events
@@ -66,14 +66,14 @@
 
     <filter mdm.kubenodeinventory**>
      type filter_inventory2mdm
-     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
      log_level info
     </filter>
 
     #custom_metrics_mdm filter plugin for perf data from windows nodes
     <filter mdm.cadvisorperf**>
      type filter_cadvisor2mdm
-     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
      metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes
      log_level info
     </filter>
diff --git a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
index ee0664495..475b17a46 100644
--- a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
@@ -18,7 +18,7 @@ data:
       tag oms.containerinsights.KubePodInventory
       run_interval 60
       log_level debug
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
      </source>
 
      #Kubernetes events
@@ -70,14 +70,14 @@ data:
      </source>
      <filter mdm.kubenodeinventory**>
       type filter_inventory2mdm
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
       log_level info
      </filter>
 
      # custom_metrics_mdm filter plugin for perf data from windows nodes
      <filter mdm.cadvisorperf**>
       type filter_cadvisor2mdm
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
       metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
       log_level info
      </filter>
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 5cda4dcb3..9c8f9de14 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -64,7 +64,7 @@ data:
       tag oms.containerinsights.KubePodInventory
       run_interval 60
       log_level debug
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
      </source>
 
      #Kubernetes events
@@ -117,14 +117,14 @@ data:
 
      <filter mdm.kubenodeinventory**>
       type filter_inventory2mdm
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
       log_level info
      </filter>
 
      #custom_metrics_mdm filter plugin for perf data from windows nodes
      <filter mdm.cadvisorperf**>
       type filter_cadvisor2mdm
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast
+      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
       metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes
       log_level info
      </filter>
diff --git a/scripts/troubleshoot/TroubleshootError.ps1 b/scripts/troubleshoot/TroubleshootError.ps1
index 754a43e74..4c2d95ac6 100644
--- a/scripts/troubleshoot/TroubleshootError.ps1
+++ b/scripts/troubleshoot/TroubleshootError.ps1
@@ -234,7 +234,17 @@ $MdmCustomMetricAvailabilityLocations = (
     'eastasia',
     'centralindia',
     'uksouth',
-    'canadacentral'
+    'canadacentral',
+    'francecentral',
+    'japaneast',
+    'australiaeast',
+    'eastus2',
+    'westus',
+    'australiasoutheast',
+    'brazilsouth',
+    'germanywestcentral',
+    'northcentralus',
+    'switzerlandnorth'
 );
 
 try {

From 2d8c03fec9edc15da7df5a14b9b5d561b4e85add Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 23 Sep 2020 13:01:07 -0700
Subject: [PATCH 25/60] add 'Terminating' state (#443)

---
 source/plugins/ruby/constants.rb            | 3 +++
 source/plugins/ruby/in_kube_podinventory.rb | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index 82a6e8814..a64a4c97c 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -85,4 +85,7 @@ class Constants
     TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 10
     KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 15
     MDM_TIME_SERIES_FLUSHED_IN_LAST_HOUR = "MdmTimeSeriesFlushedInLastHour"
+
+    #Pod Statuses
+    POD_STATUS_TERMINATING = "Terminating"
 end
\ No newline at end of file
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index bffa725ee..4880d80e7 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -194,6 +194,9 @@ def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTi
 
           if podReadyCondition == false
             record["PodStatus"] = "Unknown"
+          # ICM - https://portal.microsofticm.com/imp/v3/incidents/details/187091803/home
+          elsif !items["metadata"]["deletionTimestamp"].nil? && !items["metadata"]["deletionTimestamp"].empty?
+            record["PodStatus"] = Constants::POD_STATUS_TERMINATING
           else
             record["PodStatus"] = items["status"]["phase"]
           end

From da06d760ccb324e034a84187a3766c89d6bffb02 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Fri, 25 Sep 2020 12:36:27 -0700
Subject: [PATCH 26/60] Gangams/sept agent release tasks (#445)

* turnoff mdm nonsupported cluster types

* enable validation of server cert for ai ruby http client

* add kubelet operations total and total error metrics

* node selector label change

* label update

* wip

* wip

* wip

* revert quotes
---
 build/linux/installer/conf/telegraf.conf      |  9 +++----
 .../templates/omsagent-daemonset-windows.yaml |  5 ++++
 charts/azuremonitor-containers/values.yaml    | 26 +++++++++++++++++++
 kubernetes/linux/main.sh                      |  9 +++----
 kubernetes/omsagent.yaml                      |  3 ++-
 .../channel/sender_base.rb                    |  4 +--
 source/plugins/ruby/out_mdm.rb                |  9 +++++--
 7 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/build/linux/installer/conf/telegraf.conf b/build/linux/installer/conf/telegraf.conf
index 013aa1af2..202ac9741 100644
--- a/build/linux/installer/conf/telegraf.conf
+++ b/build/linux/installer/conf/telegraf.conf
@@ -632,8 +632,7 @@
   name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
   urls = ["$CADVISOR_METRICS_URL"]
-  ## Include "$KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC", "$KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC" when we add for support for 1.18
-  fieldpass = ["$KUBELET_RUNTIME_OPERATIONS_METRIC", "$KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC"]
+  fieldpass = ["$KUBELET_RUNTIME_OPERATIONS_METRIC", "$KUBELET_RUNTIME_OPERATIONS_ERRORS_METRIC", "$KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC", "$KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC"]
 
   metric_version = 2
   url_tag = "scrapeUrl"
@@ -675,7 +674,7 @@
   name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
   urls = ["$CADVISOR_METRICS_URL"]
-  
+
   fieldpass = ["kubelet_running_pod_count","volume_manager_total_volumes", "kubelet_node_config_error", "process_resident_memory_bytes", "process_cpu_seconds_total"]
 
   metric_version = 2
@@ -690,7 +689,7 @@
   ## Optional TLS Config
   tls_ca = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
   insecure_skip_verify = true
-  
+
 
 ## prometheus custom metrics
 [[inputs.prometheus]]
@@ -731,7 +730,7 @@
   #name_prefix="container.azm.ms/"
   ## An array of urls to scrape metrics from.
   urls = $AZMON_INTEGRATION_NPM_METRICS_URL_LIST_NODE
-  
+
   metric_version = 2
   url_tag = "scrapeUrl"
 
diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
index 7acd46c37..72b09f6c1 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
@@ -25,8 +25,13 @@ spec:
     dockerProviderVersion: {{ .Values.omsagent.image.dockerProviderVersion }}
     schema-versions: "v1"
   spec:
+{{- if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }}
+   nodeSelector:
+      kubernetes.io/os: windows
+{{- else }}
    nodeSelector:
       beta.kubernetes.io/os: windows
+{{- end }}
    {{- if .Values.omsagent.rbac }}
    serviceAccountName: omsagent
    {{- end }}
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 9c48cf9fb..1804d1197 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -56,6 +56,17 @@ omsagent:
     affinity:
       nodeAffinity:
         requiredDuringSchedulingIgnoredDuringExecution:
+          nodeSelectorTerms:
+            - labelSelector:
+              matchExpressions:
+                - key: kubernetes.io/os
+                  operator: In
+                  values:
+                    - linux
+                - key: type
+                  operator: NotIn
+                  values:
+                    - virtual-kubelet
           nodeSelectorTerms:
             - labelSelector:
               matchExpressions:
@@ -71,6 +82,21 @@ omsagent:
     affinity:
       nodeAffinity:
         requiredDuringSchedulingIgnoredDuringExecution:
+          nodeSelectorTerms:
+            - labelSelector:
+              matchExpressions:
+                - key: kubernetes.io/os
+                  operator: In
+                  values:
+                    - linux
+                - key: type
+                  operator: NotIn
+                  values:
+                    - virtual-kubelet
+                - key: kubernetes.io/role
+                  operator: NotIn
+                  values:
+                    - master
           nodeSelectorTerms:
             - labelSelector:
               matchExpressions:
diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh
index d9fdc42e9..11972f0f4 100644
--- a/kubernetes/linux/main.sh
+++ b/kubernetes/linux/main.sh
@@ -300,11 +300,10 @@ fi
 echo "configured container runtime on kubelet is : "$CONTAINER_RUNTIME
 echo "export CONTAINER_RUNTIME="$CONTAINER_RUNTIME >> ~/.bashrc
 
-# enable these metrics in next agent release
-# export KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC="kubelet_runtime_operations_total"
-# echo "export KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC="$KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC >> ~/.bashrc
-# export KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC="kubelet_runtime_operations_errors_total"
-# echo "export KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC="$KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC >> ~/.bashrc
+export KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC="kubelet_runtime_operations_total"
+echo "export KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC="$KUBELET_RUNTIME_OPERATIONS_TOTAL_METRIC >> ~/.bashrc
+export KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC="kubelet_runtime_operations_errors_total"
+echo "export KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC="$KUBELET_RUNTIME_OPERATIONS_ERRORS_TOTAL_METRIC >> ~/.bashrc
 
 # default to docker metrics
 export KUBELET_RUNTIME_OPERATIONS_METRIC="kubelet_docker_operations"
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 9c8f9de14..09e50b5a4 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -419,7 +419,8 @@ spec:
             nodeSelectorTerms:
               - labelSelector:
                 matchExpressions:
-                  - key: beta.kubernetes.io/os
+      # kubernetes.io/os label doesnt exist in k8s versions < 1.14  so make sure to choose label based on k8s version in aks yaml
+                  - key: kubernetes.io/os
                     operator: In
                     values:
                     - linux
diff --git a/source/plugins/ruby/lib/application_insights/channel/sender_base.rb b/source/plugins/ruby/lib/application_insights/channel/sender_base.rb
index 33ac49286..bedbae4ee 100644
--- a/source/plugins/ruby/lib/application_insights/channel/sender_base.rb
+++ b/source/plugins/ruby/lib/application_insights/channel/sender_base.rb
@@ -66,12 +66,12 @@ def send(data_to_send)
         request.body = compressed_data
         if @proxy.nil? || @proxy.empty?
           http = Net::HTTP.new uri.hostname, uri.port
-        else 
+        else
           http = Net::HTTP.new(uri.hostname, uri.port, @proxy[:addr], @proxy[:port], @proxy[:user], @proxy[:pass])
         end
         if uri.scheme.downcase == 'https'
           http.use_ssl = true
-          http.verify_mode = OpenSSL::SSL::VERIFY_NONE
+          http.verify_mode = OpenSSL::SSL::VERIFY_PEER
         end
 
         response = http.request(request)
diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb
index c4cc46dd7..1c805255a 100644
--- a/source/plugins/ruby/out_mdm.rb
+++ b/source/plugins/ruby/out_mdm.rb
@@ -61,15 +61,17 @@ def configure(conf)
     def start
       super
       begin
-        file = File.read(@@azure_json_path)
-        @data_hash = JSON.parse(file)
         aks_resource_id = ENV["AKS_RESOURCE_ID"]
         aks_region = ENV["AKS_REGION"]
 
         if aks_resource_id.to_s.empty?
           @log.info "Environment Variable AKS_RESOURCE_ID is not set.. "
           @can_send_data_to_mdm = false
+        elsif !aks_resource_id.downcase.include?("/microsoft.containerservice/managedclusters/") && !aks_resource_id.downcase.include?("/microsoft.kubernetes/connectedclusters/")
+          @log.info "MDM Metris not supported for this cluster type resource: #{aks_resource_id}"
+          @can_send_data_to_mdm = false
         end
+
         if aks_region.to_s.empty?
           @log.info "Environment Variable AKS_REGION is not set.. "
           @can_send_data_to_mdm = false
@@ -106,6 +108,9 @@ def start
             @cluster_identity = ArcK8sClusterIdentity.new
             @cached_access_token = @cluster_identity.get_cluster_identity_token
           else
+            # azure json file only used for aks and doesnt exist in non-azure envs
+            file = File.read(@@azure_json_path)
+            @data_hash = JSON.parse(file)
             # Check to see if SP exists, if it does use SP. Else, use msi
             sp_client_id = @data_hash["aadClientId"]
             sp_client_secret = @data_hash["aadClientSecret"]

From 545305438d54d44c5d3b02cd075019eb57617a48 Mon Sep 17 00:00:00 2001
From: Grace Wehner <grace.wehner@microsoft.com>
Date: Mon, 28 Sep 2020 11:36:38 -0700
Subject: [PATCH 27/60] grwehner/pv-collect-volume-name (#448)

Collect and send the volume name as another tag for pvUsedBytes in InsightsMetrics, so that it can be displayed in the workload workbook. Does not affect the PV MDM metric
---
 source/plugins/ruby/CAdvisorMetricsAPIClient.rb | 1 +
 source/plugins/ruby/constants.rb                | 1 +
 2 files changed, 2 insertions(+)

diff --git a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
index 7661bb7a1..9e0935480 100644
--- a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
+++ b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
@@ -359,6 +359,7 @@ def getPersistentVolumeMetrics(metricJSON, hostName, metricNameToCollect, metric
                   metricTags[Constants::INSIGHTSMETRICS_TAGS_POD_NAME] = podName
                   metricTags[Constants::INSIGHTSMETRICS_TAGS_PVC_NAME] = pvcName
                   metricTags[Constants::INSIGHTSMETRICS_TAGS_PVC_NAMESPACE] = pvcNamespace
+                  metricTags[Constants::INSIGHTSMETRICS_TAGS_VOLUME_NAME] = volume["name"]
                   metricTags[Constants::INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES] = volume["capacityBytes"]
 
                   metricItem["Tags"] = metricTags
diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index a64a4c97c..73e3af471 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -19,6 +19,7 @@ class Constants
     INSIGHTSMETRICS_TAGS_PVC_NAMESPACE = "pvcNamespace"
     INSIGHTSMETRICS_TAGS_POD_NAME = "podName"
     INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES = "pvCapacityBytes"
+    INSIGHTSMETRICS_TAGS_VOLUME_NAME = "volumeName"
     INSIGHTSMETRICS_FLUENT_TAG = "oms.api.InsightsMetrics"
     REASON_OOM_KILLED = "oomkilled"
     #Kubestate (common)

From fe9f14df60f8d9a0cc52d33ad13c8c05b0c76cbb Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Tue, 29 Sep 2020 17:34:30 -0700
Subject: [PATCH 28/60] Changes for september agent release (#449)

Moving from v1beta1 to v1 for health CRD
Adding timer for zero filling
Adding zero filling for PV metrics
---
 .../templates/omsagent-crd.yaml               | 24 ++++++
 kubernetes/omsagent.yaml                      | 14 +++-
 source/plugins/ruby/MdmMetricsGenerator.rb    | 77 ++++++++++++-------
 source/plugins/ruby/constants.rb              | 63 +++++++--------
 4 files changed, 116 insertions(+), 62 deletions(-)

diff --git a/charts/azuremonitor-containers/templates/omsagent-crd.yaml b/charts/azuremonitor-containers/templates/omsagent-crd.yaml
index f4a028bd3..bbaf89a52 100644
--- a/charts/azuremonitor-containers/templates/omsagent-crd.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-crd.yaml
@@ -1,3 +1,4 @@
+{{- if semverCompare "<1.19-0" .Capabilities.KubeVersion.GitVersion }}
 apiVersion: apiextensions.k8s.io/v1beta1
 kind: CustomResourceDefinition
 metadata:
@@ -10,3 +11,26 @@ spec:
   names:
     plural: healthstates
     kind: HealthState
+{{- else }}
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  name: healthstates.azmon.container.insights
+  namespace: kube-system
+spec:
+  group: azmon.container.insights
+  versions:
+  - name: v1
+    served: true
+    storage: true
+    schema:
+      openAPIV3Schema:
+        type: object
+        properties:
+          state:
+            type: string
+  scope: Namespaced
+  names:
+    plural: healthstates
+    kind: HealthState
+{{- end }}
\ No newline at end of file
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 09e50b5a4..e8352e020 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -746,14 +746,24 @@ spec:
       port: 25227
       targetPort: in-rs-tcp
 ---
-apiVersion: apiextensions.k8s.io/v1beta1
+# this is for versions >=1.19, for versions <1.19 we continue to use v1beta1
+apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   name: healthstates.azmon.container.insights
   namespace: kube-system
 spec:
   group: azmon.container.insights
-  version: v1
+  versions:
+  - name: v1
+    served: true
+    storage: true
+    schema:
+      openAPIV3Schema:
+        type: object
+        properties:
+          state:
+            type: string
   scope: Namespaced
   names:
     plural: healthstates
diff --git a/source/plugins/ruby/MdmMetricsGenerator.rb b/source/plugins/ruby/MdmMetricsGenerator.rb
index 1e7db37cc..b8104212d 100644
--- a/source/plugins/ruby/MdmMetricsGenerator.rb
+++ b/source/plugins/ruby/MdmMetricsGenerator.rb
@@ -8,9 +8,11 @@ class MdmMetricsGenerator
   require_relative "MdmAlertTemplates"
   require_relative "ApplicationInsightsUtility"
   require_relative "constants"
+  require_relative "oms_common"
 
   @log_path = "/var/opt/microsoft/docker-cimprov/log/mdm_metrics_generator.log"
   @log = Logger.new(@log_path, 1, 5000000)
+  @@hostName = (OMS::Common.get_hostname)
 
   @oom_killed_container_count_hash = {}
   @container_restart_count_hash = {}
@@ -38,11 +40,12 @@ class MdmMetricsGenerator
   }
 
   @@pod_metric_name_metric_percentage_name_hash = {
-    Constants::PV_USED_BYTES => Constants::MDM_PV_UTILIZATION_METRIC
+    Constants::PV_USED_BYTES => Constants::MDM_PV_UTILIZATION_METRIC,
   }
 
   # Setting this to true since we need to send zero filled metrics at startup. If metrics are absent alert creation fails
   @sendZeroFilledMetrics = true
+  @zeroFilledMetricsTimeTracker = DateTime.now.to_time.to_i
 
   def initialize
   end
@@ -179,6 +182,19 @@ def zeroFillMetricRecords(records, batch_time)
         if !containerMemoryWorkingSetRecord.nil? && !containerMemoryWorkingSetRecord.empty? && !containerMemoryWorkingSetRecord[0].nil? && !containerMemoryWorkingSetRecord[0].empty?
           records.push(containerMemoryWorkingSetRecord[0])
         end
+
+        pvZeroFillDims = {}
+        pvZeroFillDims[Constants::INSIGHTSMETRICS_TAGS_PVC_NAMESPACE] = Constants::KUBESYSTEM_NAMESPACE_ZERO_FILL
+        pvZeroFillDims[Constants::INSIGHTSMETRICS_TAGS_POD_NAME] = Constants::OMSAGENT_ZERO_FILL
+        pvResourceUtilMetricRecord = getPVResourceUtilMetricRecords(batch_time,
+                                                                    Constants::PV_USED_BYTES,
+                                                                    @@hostName,
+                                                                    0,
+                                                                    pvZeroFillDims,
+                                                                    metric_threshold_hash[Constants::PV_USED_BYTES])
+        if !pvResourceUtilMetricRecord.nil? && !pvResourceUtilMetricRecord.empty? && !pvResourceUtilMetricRecord[0].nil? && !pvResourceUtilMetricRecord[0].empty?
+          records.push(pvResourceUtilMetricRecord[0])
+        end
       rescue => errorStr
         @log.info "Error in zeroFillMetricRecords: #{errorStr}"
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
@@ -189,10 +205,13 @@ def zeroFillMetricRecords(records, batch_time)
     def appendAllPodMetrics(records, batch_time)
       begin
         @log.info "in appendAllPodMetrics..."
-        if @sendZeroFilledMetrics == true
+        timeDifference = (DateTime.now.to_time.to_i - @zeroFilledMetricsTimeTracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if @sendZeroFilledMetrics == true || (timeDifferenceInMinutes >= Constants::ZERO_FILL_METRICS_INTERVAL_IN_MINUTES)
           records = zeroFillMetricRecords(records, batch_time)
           # Setting it to false after startup
           @sendZeroFilledMetrics = false
+          @zeroFilledMetricsTimeTracker = DateTime.now.to_time.to_i
         end
         records = appendPodMetrics(records,
                                    Constants::MDM_OOM_KILLED_CONTAINER_COUNT,
@@ -325,22 +344,22 @@ def getMetricRecords(record)
       begin
         dimNames = String.new "" #mutable string
         dimValues = String.new ""
-        noDimVal ="-"
+        noDimVal = "-"
         metricValue = 0
         if !record["tags"].nil?
-            dimCount = 0
-            record["tags"].each { |k, v| 
-            dimCount = dimCount+1
-              if (dimCount <= 10) #MDM = 10 dims
-                dimNames.concat("\"#{k}\"")
-                dimNames.concat(",")
-                if !v.nil? && v.length >0
-                  dimValues.concat("\"#{v}\"")
-                else
-                  dimValues.concat("\"#{noDimVal}\"")
-                end
-                dimValues.concat(",")
+          dimCount = 0
+          record["tags"].each { |k, v|
+            dimCount = dimCount + 1
+            if (dimCount <= 10) #MDM = 10 dims
+              dimNames.concat("\"#{k}\"")
+              dimNames.concat(",")
+              if !v.nil? && v.length > 0
+                dimValues.concat("\"#{v}\"")
+              else
+                dimValues.concat("\"#{noDimVal}\"")
               end
+              dimValues.concat(",")
+            end
           }
           if (dimNames.end_with?(","))
             dimNames.chomp!(",")
@@ -353,19 +372,19 @@ def getMetricRecords(record)
         convertedTimestamp = Time.at(timestamp.to_i).utc.iso8601
         if !record["fields"].nil?
           record["fields"].each { |k, v|
-          if is_numeric(v)
-            metricRecord = MdmAlertTemplates::Generic_metric_template % {
-              timestamp: convertedTimestamp,
-              metricName: k,
-              namespaceSuffix: record["name"],
-              dimNames: dimNames,
-              dimValues: dimValues,
-              metricValue: v,
-            }
-            records.push(Yajl::Parser.parse(StringIO.new(metricRecord)))
-            #@log.info "pushed mdmgenericmetric: #{k},#{v}"
-          end
-            }
+            if is_numeric(v)
+              metricRecord = MdmAlertTemplates::Generic_metric_template % {
+                timestamp: convertedTimestamp,
+                metricName: k,
+                namespaceSuffix: record["name"],
+                dimNames: dimNames,
+                dimValues: dimValues,
+                metricValue: v,
+              }
+              records.push(Yajl::Parser.parse(StringIO.new(metricRecord)))
+              #@log.info "pushed mdmgenericmetric: #{k},#{v}"
+            end
+          }
         end
       rescue => errorStr
         @log.info "getMetricRecords:Error: #{errorStr} for record #{record}"
@@ -375,7 +394,7 @@ def getMetricRecords(record)
     end
 
     def is_numeric(o)
-        true if Float(o) rescue false
+      true if Float(o) rescue false
     end
 
     def getContainerResourceUtilizationThresholds
diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index 73e3af471..be1a9de64 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -57,36 +57,37 @@ class Constants
     MDM_NODE_MEMORY_RSS_PERCENTAGE = "memoryRssPercentage"
     MDM_NODE_MEMORY_WORKING_SET_PERCENTAGE = "memoryWorkingSetPercentage"
 
-    CONTAINER_TERMINATED_RECENTLY_IN_MINUTES = 5
-    OBJECT_NAME_K8S_CONTAINER = "K8SContainer"
-    OBJECT_NAME_K8S_NODE = "K8SNode"
-    CPU_USAGE_NANO_CORES = "cpuUsageNanoCores"
-    CPU_USAGE_MILLI_CORES = "cpuUsageMillicores"
-    MEMORY_WORKING_SET_BYTES= "memoryWorkingSetBytes"
-    MEMORY_RSS_BYTES = "memoryRssBytes"
-    PV_USED_BYTES = "pvUsedBytes"
-    DEFAULT_MDM_CPU_UTILIZATION_THRESHOLD = 95.0
-    DEFAULT_MDM_MEMORY_RSS_THRESHOLD = 95.0
-    DEFAULT_MDM_MEMORY_WORKING_SET_THRESHOLD = 95.0
-    DEFAULT_MDM_PV_UTILIZATION_THRESHOLD = 60.0
-    CONTROLLER_KIND_JOB = "job"
-    CONTAINER_TERMINATION_REASON_COMPLETED = "completed"
-    CONTAINER_STATE_TERMINATED = "terminated"
-    STALE_JOB_TIME_IN_MINUTES = 360
-    TELEGRAF_DISK_METRICS = "container.azm.ms/disk"
-    OMSAGENT_ZERO_FILL = "omsagent"
-    KUBESYSTEM_NAMESPACE_ZERO_FILL = "kube-system"
+  CONTAINER_TERMINATED_RECENTLY_IN_MINUTES = 5
+  OBJECT_NAME_K8S_CONTAINER = "K8SContainer"
+  OBJECT_NAME_K8S_NODE = "K8SNode"
+  CPU_USAGE_NANO_CORES = "cpuUsageNanoCores"
+  CPU_USAGE_MILLI_CORES = "cpuUsageMillicores"
+  MEMORY_WORKING_SET_BYTES = "memoryWorkingSetBytes"
+  MEMORY_RSS_BYTES = "memoryRssBytes"
+  PV_USED_BYTES = "pvUsedBytes"
+  DEFAULT_MDM_CPU_UTILIZATION_THRESHOLD = 95.0
+  DEFAULT_MDM_MEMORY_RSS_THRESHOLD = 95.0
+  DEFAULT_MDM_MEMORY_WORKING_SET_THRESHOLD = 95.0
+  DEFAULT_MDM_PV_UTILIZATION_THRESHOLD = 60.0
+  CONTROLLER_KIND_JOB = "job"
+  CONTAINER_TERMINATION_REASON_COMPLETED = "completed"
+  CONTAINER_STATE_TERMINATED = "terminated"
+  STALE_JOB_TIME_IN_MINUTES = 360
+  TELEGRAF_DISK_METRICS = "container.azm.ms/disk"
+  OMSAGENT_ZERO_FILL = "omsagent"
+  KUBESYSTEM_NAMESPACE_ZERO_FILL = "kube-system"
 
-    #Telemetry constants
-    CONTAINER_METRICS_HEART_BEAT_EVENT = "ContainerMetricsMdmHeartBeatEvent"
-    POD_READY_PERCENTAGE_HEART_BEAT_EVENT = "PodReadyPercentageMdmHeartBeatEvent"
-    CONTAINER_RESOURCE_UTIL_HEART_BEAT_EVENT = "ContainerResourceUtilMdmHeartBeatEvent"
-    PV_USAGE_HEART_BEAT_EVENT = "PVUsageMdmHeartBeatEvent"
-    PV_KUBE_SYSTEM_METRICS_ENABLED_EVENT = "CollectPVKubeSystemMetricsEnabled"
-    TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 10
-    KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 15
-    MDM_TIME_SERIES_FLUSHED_IN_LAST_HOUR = "MdmTimeSeriesFlushedInLastHour"
+  #Telemetry constants
+  CONTAINER_METRICS_HEART_BEAT_EVENT = "ContainerMetricsMdmHeartBeatEvent"
+  POD_READY_PERCENTAGE_HEART_BEAT_EVENT = "PodReadyPercentageMdmHeartBeatEvent"
+  CONTAINER_RESOURCE_UTIL_HEART_BEAT_EVENT = "ContainerResourceUtilMdmHeartBeatEvent"
+  PV_USAGE_HEART_BEAT_EVENT = "PVUsageMdmHeartBeatEvent"
+  PV_KUBE_SYSTEM_METRICS_ENABLED_EVENT = "CollectPVKubeSystemMetricsEnabled"
+  TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 10
+  KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 15
+  ZERO_FILL_METRICS_INTERVAL_IN_MINUTES = 30
+  MDM_TIME_SERIES_FLUSHED_IN_LAST_HOUR = "MdmTimeSeriesFlushedInLastHour"
 
-    #Pod Statuses
-    POD_STATUS_TERMINATING = "Terminating"
-end
\ No newline at end of file
+  #Pod Statuses
+  POD_STATUS_TERMINATING = "Terminating"
+end

From f1657c65f2408bfd66a45cfa54c2d8a27770ac6a Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 30 Sep 2020 18:13:25 -0700
Subject: [PATCH 29/60] Gangams/arc k8s related scripts, charts and doc updates
 (#450)

* checksum annotations

* script update for chart from mcr

* chart updates

* update chart version to match with chart release

* script updates

* latest chart updates

* version updates for chart release

* script updates

* script updates

* doc updates

* doc updates

* update comments

* fix bug in ps script

* fix bug in ps script

* minor update

* release process updates

* use consistent name across scripts

* use consistent names
---
 ....sh => push-helm-chart-to-canary-repos.sh} |  24 +-
 .pipelines/push-helm-chart-to-prod-repos.sh   |  53 ++
 ReleaseProcess.md                             |   5 +-
 charts/azuremonitor-containers/Chart.yaml     |   2 +-
 .../templates/omsagent-daemonset-windows.yaml |   2 +
 .../templates/omsagent-daemonset.yaml         |   3 +
 .../templates/omsagent-deployment.yaml        |   3 +
 charts/azuremonitor-containers/values.yaml    |   6 +-
 .../onboarding/managed/disable-monitoring.ps1 |  12 +-
 .../onboarding/managed/disable-monitoring.sh  |  10 +-
 .../onboarding/managed/enable-monitoring.ps1  | 118 ++--
 .../onboarding/managed/enable-monitoring.sh   | 552 +++++++++---------
 .../onboarding/managed/upgrade-monitoring.sh  | 314 ++++++++++
 13 files changed, 733 insertions(+), 371 deletions(-)
 rename .pipelines/{push-helm-chart-as-oci-artifact.sh => push-helm-chart-to-canary-repos.sh} (54%)
 create mode 100644 .pipelines/push-helm-chart-to-prod-repos.sh
 create mode 100644 scripts/onboarding/managed/upgrade-monitoring.sh

diff --git a/.pipelines/push-helm-chart-as-oci-artifact.sh b/.pipelines/push-helm-chart-to-canary-repos.sh
similarity index 54%
rename from .pipelines/push-helm-chart-as-oci-artifact.sh
rename to .pipelines/push-helm-chart-to-canary-repos.sh
index 50e16e3d0..db8bff56e 100644
--- a/.pipelines/push-helm-chart-as-oci-artifact.sh
+++ b/.pipelines/push-helm-chart-to-canary-repos.sh
@@ -1,8 +1,9 @@
 #!/bin/bash
-# push the helm chart as an OCI artifact to specified ACR
 # working directory of this script should be charts/azuremonitor-containers
 
-export REPO_PATH="batch1/test/azure-monitor-containers"
+# note: this repo registered in arc k8s extension for canary region
+export REPO_PATH="public/azuremonitor/containerinsights/canary/preview/azuremonitor-containers"
+
 export  HELM_EXPERIMENTAL_OCI=1
 
 for ARGUMENT in "$@"
@@ -11,13 +12,13 @@ do
    VALUE=$(echo $ARGUMENT | cut -f2 -d=)
 
    case "$KEY" in
-           CIARCACR) CIARCACR=$VALUE ;;
+           CIACR) CIACR=$VALUE ;;
            CICHARTVERSION) CHARTVERSION=$VALUE ;;
            *)
     esac
 done
 
-echo "CI ARC K8S ACR: ${CIARCACR}"
+echo "CI ARC K8S ACR: ${CIACR}"
 echo "CI HELM CHART VERSION: ${CHARTVERSION}"
 
 echo "start: read appid and appsecret"
@@ -25,18 +26,19 @@ ACR_APP_ID=$(cat ~/acrappid)
 ACR_APP_SECRET=$(cat ~/acrappsecret)
 echo "end: read appid and appsecret"
 
-ACR=${CIARCACR}
+ACR=${CIACR}
+
+echo "login to acr:${ACR} using helm"
+helm registry login $ACR  --username $ACR_APP_ID --password $ACR_APP_SECRET
 
-echo "login to acr:${ACR} using oras"
-oras login $ACR  --username $ACR_APP_ID --password $ACR_APP_SECRET
 echo "login to acr:${ACR} completed: ${ACR}"
 
 echo "start: push the chart version: ${CHARTVERSION} to acr repo: ${ACR}"
 
-echo "generate helm package"
-helm package .
+echo "save the chart locally with acr full path"
+helm chart save . ${ACR}/${REPO_PATH}:${CHARTVERSION}
 
-echo "pushing the helm chart as an OCI artifact"
-oras push ${ACR}/${REPO_PATH}:${CHARTVERSION} --manifest-config /dev/null:application/vnd.unknown.config.v1+json  ./azuremonitor-containers-${CHARTVERSION}.tgz:application/tar+gzip
+echo "pushing the helm chart to ACR: ${ACR}"
+helm chart push ${ACR}/${REPO_PATH}:${CHARTVERSION}
 
 echo "end: push the chart version: ${CHARTVERSION} to acr repo: ${ACR}"
diff --git a/.pipelines/push-helm-chart-to-prod-repos.sh b/.pipelines/push-helm-chart-to-prod-repos.sh
new file mode 100644
index 000000000..71aa989de
--- /dev/null
+++ b/.pipelines/push-helm-chart-to-prod-repos.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# working directory of this script should be charts/azuremonitor-containers
+
+# this repo used without extension public preview release
+export PROD_REPO_PATH="public/azuremonitor/containerinsights/preview/azuremonitor-containers"
+
+# note: this repo registered in arc k8s extension for prod group1 regions.
+export EXTENSION_PROD_REPO_PATH="public/azuremonitor/containerinsights/prod1/preview/azuremonitor-containers"
+
+export  HELM_EXPERIMENTAL_OCI=1
+
+for ARGUMENT in "$@"
+do
+   KEY=$(echo $ARGUMENT | cut -f1 -d=)
+   VALUE=$(echo $ARGUMENT | cut -f2 -d=)
+
+   case "$KEY" in
+           CIACR) CIACR=$VALUE ;;
+           CICHARTVERSION) CHARTVERSION=$VALUE ;;
+           *)
+    esac
+done
+
+echo "CI ARC K8S ACR: ${CIACR}"
+echo "CI HELM CHART VERSION: ${CHARTVERSION}"
+
+echo "start: read appid and appsecret"
+ACR_APP_ID=$(cat ~/acrappid)
+ACR_APP_SECRET=$(cat ~/acrappsecret)
+echo "end: read appid and appsecret"
+
+ACR=${CIACR}
+
+echo "login to acr:${ACR} using helm"
+helm registry login $ACR  --username $ACR_APP_ID --password $ACR_APP_SECRET
+
+echo "login to acr:${ACR} completed: ${ACR}"
+
+echo "start: push the chart version: ${CHARTVERSION} to acr repo: ${ACR}"
+
+echo "save the chart locally with acr full path: ${ACR}/${EXTENSION_PROD_REPO_PATH}:${CHARTVERSION}"
+helm chart save . ${ACR}/${EXTENSION_PROD_REPO_PATH}:${CHARTVERSION}
+
+echo "save the chart locally with acr full path: ${ACR}/${PROD_REPO_PATH}:${CHARTVERSION}"
+helm chart save . ${ACR}/${PROD_REPO_PATH}:${CHARTVERSION}
+
+echo "pushing the helm chart to ACR: ${ACR}/${EXTENSION_PROD_REPO_PATH}:${CHARTVERSION}"
+helm chart push ${ACR}/${EXTENSION_PROD_REPO_PATH}:${CHARTVERSION}
+
+echo "pushing the helm chart to ACR: ${ACR}/${PROD_REPO_PATH}:${CHARTVERSION}"
+helm chart push ${ACR}/${PROD_REPO_PATH}:${CHARTVERSION}
+
+echo "end: push the chart version: ${CHARTVERSION} to acr repo: ${ACR}"
diff --git a/ReleaseProcess.md b/ReleaseProcess.md
index 19802e22c..2a3e6001a 100644
--- a/ReleaseProcess.md
+++ b/ReleaseProcess.md
@@ -45,7 +45,10 @@ Make PR against [AKS-Engine](https://github.com/Azure/aks-engine). Refer PR http
 
 ## ARO v4, On-prem K8s, Azure Arc K8s and OpenShift v4 clusters
 
-Make PR against [HELM-charts](https://github.com/helm/charts) with Azure Monitor for containers chart update.
+Make sure azuremonitor-containers chart yamls updates with all changes going with the release and also make sure to bump the chart version, imagetag and docker provider version etc. Similar to agent container image, build pipeline automatically push the chart to container insights prod acr for canary and prod repos accordingly.
+Both the agent and helm chart will be replicated to `mcr.microsoft.com`.
+
+The way, customers will be onboard the monitoring to these clusters using onboarding scripts under `onboarding\managed` directory so please bump chart version for prod release. Once we move to Arc K8s Monitoring extension Public preview, these will be taken care so at that point of time no manual changes like this required.
 
 # 4. Monitor agent roll-out status
 
diff --git a/charts/azuremonitor-containers/Chart.yaml b/charts/azuremonitor-containers/Chart.yaml
index 8976b5561..1d3fed86f 100644
--- a/charts/azuremonitor-containers/Chart.yaml
+++ b/charts/azuremonitor-containers/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v1
 appVersion: 7.0.0-1
 description: Helm chart for deploying Azure Monitor container monitoring agent in Kubernetes
 name: azuremonitor-containers
-version: 2.7.4
+version: 2.7.6
 kubeVersion: "^1.10.0-0"
 keywords:
   - monitoring
diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
index 72b09f6c1..e65f9a98d 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
@@ -24,6 +24,8 @@ spec:
     agentVersion: {{ .Values.omsagent.image.tagWindows }}
     dockerProviderVersion: {{ .Values.omsagent.image.dockerProviderVersion }}
     schema-versions: "v1"
+    checksum/secret: {{ include (print $.Template.BasePath "/omsagent-secret.yaml") . | sha256sum }}
+    checksum/config: {{ toYaml .Values.omsagent.resources | sha256sum }}
   spec:
 {{- if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }}
    nodeSelector:
diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
index 7514247a0..438294ce5 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
@@ -24,6 +24,9 @@ spec:
     agentVersion: {{ .Values.omsagent.image.tag }}
     dockerProviderVersion: {{ .Values.omsagent.image.dockerProviderVersion }}
     schema-versions: "v1"
+    checksum/secret: {{ include (print $.Template.BasePath "/omsagent-secret.yaml") . | sha256sum }}
+    checksum/config: {{ toYaml .Values.omsagent.resources | sha256sum }}
+    checksum/logsettings: {{ toYaml .Values.omsagent.logsettings | sha256sum }}
   spec:
    {{- if .Values.omsagent.rbac }}
    serviceAccountName: omsagent
diff --git a/charts/azuremonitor-containers/templates/omsagent-deployment.yaml b/charts/azuremonitor-containers/templates/omsagent-deployment.yaml
index 7d7ac7040..8609d25c9 100644
--- a/charts/azuremonitor-containers/templates/omsagent-deployment.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-deployment.yaml
@@ -25,6 +25,9 @@ spec:
     agentVersion: {{ .Values.omsagent.image.tag }}
     dockerProviderVersion: {{ .Values.omsagent.image.dockerProviderVersion }}
     schema-versions: "v1"
+    checksum/secret: {{ include (print $.Template.BasePath "/omsagent-secret.yaml") . | sha256sum }}
+    checksum/config: {{ toYaml .Values.omsagent.resources | sha256sum }}
+    checksum/logsettings: {{ toYaml .Values.omsagent.logsettings | sha256sum }}
   spec:
   {{- if .Values.omsagent.rbac }}
    serviceAccountName: omsagent
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 1804d1197..2711cb372 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -12,10 +12,10 @@ Azure:
 omsagent:
   image:
     repo: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod"
-    tag: "ciprod09162020"
-    tagWindows: "win-ciprod09162020"
+    tag: "ciprod09252020"
+    tagWindows: "win-ciprod09252020"
     pullPolicy: IfNotPresent
-    dockerProviderVersion: "10.0.0-5"
+    dockerProviderVersion: "10.0.0-6"
     agentVersion: "1.10.0.1"
   ## To get your workspace id and key do the following
   ## You can create a Azure Loganalytics workspace from portal.azure.com and get its ID & PRIMARY KEY from 'Advanced Settings' tab in the Ux.
diff --git a/scripts/onboarding/managed/disable-monitoring.ps1 b/scripts/onboarding/managed/disable-monitoring.ps1
index 8945f90b6..1c011bfff 100644
--- a/scripts/onboarding/managed/disable-monitoring.ps1
+++ b/scripts/onboarding/managed/disable-monitoring.ps1
@@ -1,12 +1,12 @@
 <#
     .DESCRIPTION
 
-     Disables Azure Monitor for containers to monitoring enabled Azure Managed K8s cluster such as Azure Arc K8s, ARO v4 and AKS etc.
+     Disables Azure Monitor for containers to monitoring enabled Azure Managed K8s cluster such as Azure Arc enabled Kubernetes, ARO v4 and AKS etc.
        1. Deletes the existing Azure Monitor for containers helm release
        2. Deletes logAnalyticsWorkspaceResourceId tag on the provided Managed cluster
 
     .PARAMETER clusterResourceId
-        Id of the Azure Managed Cluster such as Azure ARC K8s, ARO v4 etc.
+        Id of the Azure Managed Cluster such as Azure Arc enabled Kubernetes, ARO v4 etc.
     .PARAMETER servicePrincipalClientId
         client Id of the service principal which will be used for the azure login
     .PARAMETER servicePrincipalClientSecret
@@ -18,7 +18,7 @@
 
     Pre-requisites:
       -  Azure Managed cluster Resource Id
-      -  Contributor role permission on the Subscription of the Azure Arc Cluster
+      -  Contributor role permission on the Subscription of the Azure Arc enabled Kubernetes Cluster
       -  Helm v3.0.0 or higher  https://github.com/helm/helm/releases
       -  kube-context of the K8s cluster
  Note: 1. Please make sure you have all the pre-requisistes before running this script.
@@ -298,7 +298,7 @@ if ($isArcK8sCluster -eq $true) {
    # validate identity
    $clusterIdentity = $clusterResource.identity.type.ToString().ToLower()
    if ($clusterIdentity.Contains("systemassigned") -eq $false) {
-     Write-Host("Identity of Azure Arc K8s cluster should be systemassigned but it has identity: $clusterIdentity") -ForegroundColor Red
+     Write-Host("Identity of Azure Arc enabled Kubernetes cluster should be systemassigned but it has identity: $clusterIdentity") -ForegroundColor Red
      exit
    }
 }
@@ -354,7 +354,3 @@ catch {
 }
 
 Write-Host("Successfully disabled Azure Monitor for containers for cluster: $clusteResourceId") -ForegroundColor Green
-
-
-
-
diff --git a/scripts/onboarding/managed/disable-monitoring.sh b/scripts/onboarding/managed/disable-monitoring.sh
index f20bd7d33..c11426f30 100644
--- a/scripts/onboarding/managed/disable-monitoring.sh
+++ b/scripts/onboarding/managed/disable-monitoring.sh
@@ -26,10 +26,10 @@ set -o pipefail
 
 # default release name used during onboarding
 releaseName="azmon-containers-release-1"
-# resource type for azure arc clusters
+# resource type for Azure Arc enabled Kubernetes clusters
 resourceProvider="Microsoft.Kubernetes/connectedClusters"
 
-# resource provider for azure arc connected cluster
+# resource provider for Azure Arc enabled Kubernetes cluster
 arcK8sResourceProvider="Microsoft.Kubernetes/connectedClusters"
 # resource provider for azure redhat openshift v4 cluster
 aroV4ResourceProvider="Microsoft.RedHatOpenShift/OpenShiftClusters"
@@ -125,13 +125,13 @@ remove_monitoring_tags()
   echo "set the cluster subscription id: ${clusterSubscriptionId}"
   az account set -s ${clusterSubscriptionId}
 
-  # validate cluster identity for ARC k8s cluster
+  # validate cluster identity for Azure Arc enabled Kubernetes cluster
   if [ "$isArcK8sCluster" = true ] ; then
    identitytype=$(az resource show -g ${clusterResourceGroup} -n ${clusterName} --resource-type $resourceProvider --query identity.type)
    identitytype=$(echo $identitytype | tr "[:upper:]" "[:lower:]" | tr -d '"')
    echo "cluster identity type:" $identitytype
     if [[ "$identitytype" != "systemassigned" ]]; then
-      echo "-e only supported cluster identity is systemassigned for Azure ARC K8s cluster type"
+      echo "-e only supported cluster identity is systemassigned for Azure Arc enabled Kubernetes cluster type"
       exit 1
     fi
   fi
@@ -257,7 +257,7 @@ done
 
  # detect the resource provider from the provider name in the cluster resource id
  if [ $providerName = "microsoft.kubernetes/connectedclusters" ]; then
-    echo "provider cluster resource is of Azure ARC K8s cluster type"
+    echo "provider cluster resource is of Azure Arc enabled Kubernetes cluster type"
     isArcK8sCluster=true
     resourceProvider=$arcK8sResourceProvider
  elif [ $providerName = "microsoft.redhatopenshift/openshiftclusters" ]; then
diff --git a/scripts/onboarding/managed/enable-monitoring.ps1 b/scripts/onboarding/managed/enable-monitoring.ps1
index 338de6cbc..1e1669400 100644
--- a/scripts/onboarding/managed/enable-monitoring.ps1
+++ b/scripts/onboarding/managed/enable-monitoring.ps1
@@ -1,14 +1,14 @@
 <#
     .DESCRIPTION
 
-     Onboards Azure Monitor for containers to Azure Managed Kuberenetes such as Azure Arc K8s, ARO v4 and AKS etc.
+     Onboards Azure Monitor for containers to Azure Managed Kuberenetes such as Azure Arc enabled Kubernetes, ARO v4 and AKS etc.
        1. Creates the Default Azure log analytics workspace if doesn't exist one in specified subscription
        2. Adds the ContainerInsights solution to the Azure log analytics workspace
        3. Adds the workspaceResourceId tag or enable addon (if the cluster is AKS) on the provided Managed cluster resource id
        4. Installs Azure Monitor for containers HELM chart to the K8s cluster in provided via --kube-context
 
     .PARAMETER clusterResourceId
-        Id of the Azure Managed Cluster such as Azure ARC K8s, ARO v4 etc.
+        Id of the Azure Managed Cluster such as Azure Arc enabled Kubernetes, ARO v4 etc.
     .PARAMETER servicePrincipalClientId
         Client Id of the service principal which will be used for the azure login
     .PARAMETER servicePrincipalClientSecret
@@ -22,10 +22,6 @@
     .PARAMETER proxyEndpoint (optional)
         Provide Proxy endpoint if you have K8s cluster behind the proxy and would like to route Azure Monitor for containers outbound traffic via proxy.
         Format of the proxy endpoint should be http(s://<user>:<password>@<proxyhost>:<port>
-    .PARAMETER helmRepoName (optional)
-        helm repo name. should be used only for the private preview features
-    .PARAMETER helmRepoUrl (optional)
-        helm repo url. should be used only for the private preview features
 
      Pre-requisites:
       -  Azure Managed cluster Resource Id
@@ -50,30 +46,23 @@ param(
     [Parameter(mandatory = $false)]
     [string]$workspaceResourceId,
     [Parameter(mandatory = $false)]
-    [string]$proxyEndpoint,
-    [Parameter(mandatory = $false)]
-    [string]$helmRepoName,
-    [Parameter(mandatory = $false)]
-    [string]$helmRepoUrl
+    [string]$proxyEndpoint
 )
 
-$solutionTemplateUri= "https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_dev/scripts/onboarding/templates/azuremonitor-containerSolution.json"
+$solutionTemplateUri = "https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_dev/scripts/onboarding/templates/azuremonitor-containerSolution.json"
 $helmChartReleaseName = "azmon-containers-release-1"
 $helmChartName = "azuremonitor-containers"
-$helmChartRepoName = "incubator"
-$helmChartRepoUrl = "https://kubernetes-charts-incubator.storage.googleapis.com/"
+
 # flags to indicate the cluster types
 $isArcK8sCluster = $false
-$isAksCluster =  $false
+$isAksCluster = $false
 $isUsingServicePrincipal = $false
 
-if([string]::IsNullOrEmpty($helmRepoName) -eq $false){
-    $helmChartRepoName = $helmRepoName
-}
-
-if([string]::IsNullOrEmpty($helmRepoUrl) -eq $false){
-    $helmChartRepoUrl = $helmRepoUrl
-}
+# released chart version in mcr
+$mcr = "mcr.microsoft.com"
+$mcrChartVersion = "2.7.6"
+$mcrChartRepoPath = "azuremonitor/containerinsights/preview/azuremonitor-containers"
+$helmLocalRepoName = "."
 
 # checks the required Powershell modules exist and if not exists, request the user permission to install
 $azAccountModule = Get-Module -ListAvailable -Name Az.Accounts
@@ -200,7 +189,7 @@ if (($null -eq $azAccountModule) -or ($null -eq $azResourcesModule) -or ($null -
 }
 
 if ([string]::IsNullOrEmpty($clusterResourceId)) {
-    Write-Host("Specified Azure Arc ClusterResourceId should not be NULL or empty") -ForegroundColor Red
+    Write-Host("Specified Azure Arc enabled Kubernetes ClusterResourceId should not be NULL or empty") -ForegroundColor Red
     exit
 }
 
@@ -220,30 +209,31 @@ if ($clusterResourceId.StartsWith("/") -eq $false) {
     $clusterResourceId = "/" + $clusterResourceId
 }
 
-if ($clusterResourceId.Split("/").Length -ne 9){
-     Write-Host("Provided Cluster Resource Id is not in expected format") -ForegroundColor Red
+if ($clusterResourceId.Split("/").Length -ne 9) {
+    Write-Host("Provided Cluster Resource Id is not in expected format") -ForegroundColor Red
     exit
 }
 
 if (($clusterResourceId.ToLower().Contains("microsoft.kubernetes/connectedclusters") -ne $true) -and
     ($clusterResourceId.ToLower().Contains("microsoft.redhatopenshift/openshiftclusters") -ne $true) -and
     ($clusterResourceId.ToLower().Contains("microsoft.containerservice/managedclusters") -ne $true)
-  ) {
+) {
     Write-Host("Provided cluster ResourceId is not supported cluster type: $clusterResourceId") -ForegroundColor Red
     exit
 }
 
-if(([string]::IsNullOrEmpty($servicePrincipalClientId) -eq $false) -and
-   ([string]::IsNullOrEmpty($servicePrincipalClientSecret) -eq $false) -and
-   ([string]::IsNullOrEmpty($tenantId) -eq $false)) {
-   Write-Host("Using service principal creds for the azure login since these provided.")
-   $isUsingServicePrincipal = $true
+if (([string]::IsNullOrEmpty($servicePrincipalClientId) -eq $false) -and
+    ([string]::IsNullOrEmpty($servicePrincipalClientSecret) -eq $false) -and
+    ([string]::IsNullOrEmpty($tenantId) -eq $false)) {
+    Write-Host("Using service principal creds for the azure login since these provided.")
+    $isUsingServicePrincipal = $true
 }
 
 if ($clusterResourceId.ToLower().Contains("microsoft.kubernetes/connectedclusters") -eq $true) {
-   $isArcK8sCluster = $true
-} elseif ($clusterResourceId.ToLower().Contains("microsoft.containerservice/managedclusters") -eq $true) {
-   $isAksCluster =  $true
+    $isArcK8sCluster = $true
+}
+elseif ($clusterResourceId.ToLower().Contains("microsoft.containerservice/managedclusters") -eq $true) {
+    $isAksCluster = $true
 }
 
 $resourceParts = $clusterResourceId.Split("/")
@@ -253,7 +243,7 @@ Write-Host("Cluster SubscriptionId : '" + $clusterSubscriptionId + "' ") -Foregr
 
 if ($isUsingServicePrincipal) {
     $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
-    $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
+    $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId, $spSecret
     Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
 }
 
@@ -275,12 +265,13 @@ if ($null -eq $account.Account) {
     try {
         if ($isUsingServicePrincipal) {
             $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
-            $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
+            $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId, $spSecret
             Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
-        } else {
-         Write-Host("Please login...")
-         Connect-AzAccount -subscriptionid $clusterSubscriptionId
-       }
+        }
+        else {
+            Write-Host("Please login...")
+            Connect-AzAccount -subscriptionid $clusterSubscriptionId
+        }
     }
     catch {
         Write-Host("")
@@ -322,12 +313,12 @@ if ($null -eq $clusterResource) {
 $clusterRegion = $clusterResource.Location.ToLower()
 
 if ($isArcK8sCluster -eq $true) {
-   # validate identity
-   $clusterIdentity = $clusterResource.identity.type.ToString().ToLower()
-   if ($clusterIdentity.contains("systemassigned") -eq $false) {
-     Write-Host("Identity of Azure Arc K8s cluster should be systemassigned but it has identity: $clusterIdentity") -ForegroundColor Red
-     exit
-   }
+    # validate identity
+    $clusterIdentity = $clusterResource.identity.type.ToString().ToLower()
+    if ($clusterIdentity.contains("systemassigned") -eq $false) {
+        Write-Host("Identity of Azure Arc enabled Kubernetes cluster should be systemassigned but it has identity: $clusterIdentity") -ForegroundColor Red
+        exit
+    }
 }
 
 if ([string]::IsNullOrEmpty($workspaceResourceId)) {
@@ -514,7 +505,8 @@ if ($account.Subscription.Id -eq $clusterSubscriptionId) {
 if ($isAksCluster -eq $true) {
     Write-Host ("Enabling AKS Monitoring Addon ..")
     # TBD
-} else {
+}
+else {
     Write-Host("Attaching workspaceResourceId tag on the cluster ResourceId")
     $clusterResource.Tags["logAnalyticsWorkspaceResourceId"] = $WorkspaceInformation.ResourceId
     Set-AzResource -Tag $clusterResource.Tags -ResourceId $clusterResource.ResourceId -Force
@@ -526,20 +518,30 @@ Write-Host "Helm version" : $helmVersion
 Write-Host("Installing or upgrading if exists, Azure Monitor for containers HELM chart ...")
 try {
 
-    Write-Host("Adding $helmChartRepoName repo to helm: $helmChartRepoUrl")
-    helm repo add $helmChartRepoName $helmChartRepoUrl
-    Write-Host("updating helm repo to get latest version of charts")
-    helm repo update
+     Write-Host("pull the chart from mcr.microsoft.com")
+    [System.Environment]::SetEnvironmentVariable("HELM_EXPERIMENTAL_OCI", 1, "Process")
+
+    Write-Host("pull the chart from mcr.microsoft.com")
+    helm chart pull ${mcr}/${mcrChartRepoPath}:${mcrChartVersion}
+
+    Write-Host("export the chart from local cache to current directory")
+    helm chart export ${mcr}/${mcrChartRepoPath}:${mcrChartVersion} --destination .
+
+    $helmChartRepoPath = "${helmLocalRepoName}" + "/" + "${helmChartName}"
+
+    Write-Host("helmChartRepoPath is : ${helmChartRepoPath}")
+
     $helmParameters = "omsagent.secret.wsid=$workspaceGUID,omsagent.secret.key=$workspacePrimarySharedKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion"
-    if([string]::IsNullOrEmpty($proxyEndpoint) -eq $false) {
+    if ([string]::IsNullOrEmpty($proxyEndpoint) -eq $false) {
         Write-Host("using proxy endpoint since its provided")
         $helmParameters = $helmParameters + ",omsagent.proxy=$proxyEndpoint"
     }
     if ([string]::IsNullOrEmpty($kubeContext)) {
-        helm upgrade --install $helmChartReleaseName --set $helmParameters $helmChartRepoName/$helmChartName
-    } else {
-      Write-Host("using provided kube-context: $kubeContext")
-      helm upgrade --install $helmChartReleaseName --set $helmParameters $helmChartRepoName/$helmChartName --kube-context $kubeContext
+        helm upgrade --install $helmChartReleaseName --set $helmParameters $helmChartRepoPath
+    }
+    else {
+        Write-Host("using provided kube-context: $kubeContext")
+        helm upgrade --install $helmChartReleaseName --set $helmParameters $helmChartRepoPath --kube-context $kubeContext
     }
 }
 catch {
@@ -548,7 +550,3 @@ catch {
 
 Write-Host("Successfully enabled Azure Monitor for containers for cluster: $clusterResourceId") -ForegroundColor Green
 Write-Host("Proceed to https://aka.ms/azmon-containers to view your newly onboarded Azure Managed cluster") -ForegroundColor Green
-
-
-
-
diff --git a/scripts/onboarding/managed/enable-monitoring.sh b/scripts/onboarding/managed/enable-monitoring.sh
index 226fd978b..ce62a581a 100644
--- a/scripts/onboarding/managed/enable-monitoring.sh
+++ b/scripts/onboarding/managed/enable-monitoring.sh
@@ -41,9 +41,11 @@ set -o pipefail
 # default to public cloud since only supported cloud is azure public clod
 defaultAzureCloud="AzureCloud"
 
-# helm repo details
-helmRepoName="incubator"
-helmRepoUrl="https://kubernetes-charts-incubator.storage.googleapis.com/"
+# released chart version in mcr
+mcrChartVersion="2.7.6"
+mcr="mcr.microsoft.com"
+mcrChartRepoPath="azuremonitor/containerinsights/preview/azuremonitor-containers"
+helmLocalRepoName="."
 helmChartName="azuremonitor-containers"
 
 # default release name used during onboarding
@@ -58,19 +60,18 @@ aroV4ResourceProvider="Microsoft.RedHatOpenShift/OpenShiftClusters"
 # resource provider for aks cluster
 aksResourceProvider="Microsoft.ContainerService/managedClusters"
 
-# default of resourceProvider is arc k8s and this will get updated based on the provider cluster resource
+# default of resourceProvider is Azure Arc enabled Kubernetes and this will get updated based on the provider cluster resource
 resourceProvider="Microsoft.Kubernetes/connectedClusters"
 
-
 # resource type for azure log analytics workspace
 workspaceResourceProvider="Microsoft.OperationalInsights/workspaces"
 
 # openshift project name for aro v4 cluster
 openshiftProjectName="azure-monitor-for-containers"
-# arc k8s cluster resource
+# AROv4 cluster resource
 isAroV4Cluster=false
 
-# arc k8s cluster resource
+# Azure Arc enabled Kubernetes cluster resource
 isArcK8sCluster=false
 
 # aks cluster resource
@@ -103,28 +104,25 @@ servicePrincipalClientSecret=""
 servicePrincipalTenantId=""
 isUsingServicePrincipal=false
 
-usage()
-{
-    local basename=`basename $0`
-    echo
-    echo "Enable Azure Monitor for containers:"
-    echo "$basename --resource-id <cluster resource id> [--client-id <clientId of service principal>] [--client-secret <client secret of service principal>] [--tenant-id <tenant id of the service principal>] [--kube-context <name of the kube context >] [--workspace-id <resource id of existing workspace>] [--proxy <proxy endpoint>]"
+usage() {
+  local basename=$(basename $0)
+  echo
+  echo "Enable Azure Monitor for containers:"
+  echo "$basename --resource-id <cluster resource id> [--client-id <clientId of service principal>] [--client-secret <client secret of service principal>] [--tenant-id <tenant id of the service principal>] [--kube-context <name of the kube context >] [--workspace-id <resource id of existing workspace>] [--proxy <proxy endpoint>]"
 }
 
-parse_args()
-{
+parse_args() {
 
- if [ $# -le 1 ]
-  then
+  if [ $# -le 1 ]; then
     usage
     exit 1
- fi
+  fi
 
-# Transform long options to short ones
-for arg in "$@"; do
-  shift
-  case "$arg" in
-    "--resource-id")  set -- "$@" "-r" ;;
+  # Transform long options to short ones
+  for arg in "$@"; do
+    shift
+    case "$arg" in
+    "--resource-id") set -- "$@" "-r" ;;
     "--kube-context") set -- "$@" "-k" ;;
     "--workspace-id") set -- "$@" "-w" ;;
     "--proxy") set -- "$@" "-p" ;;
@@ -134,130 +132,128 @@ for arg in "$@"; do
     "--helm-repo-name") set -- "$@" "-n" ;;
     "--helm-repo-url") set -- "$@" "-u" ;;
     "--container-log-volume") set -- "$@" "-v" ;;
-    "--"*)   usage ;;
-    *)        set -- "$@" "$arg"
-  esac
-done
+    "--"*) usage ;;
+    *) set -- "$@" "$arg" ;;
+    esac
+  done
 
-local OPTIND opt
+  local OPTIND opt
 
-while getopts 'hk:r:w:p:c:s:t:n:u:v:' opt; do
+  while getopts 'hk:r:w:p:c:s:t:n:u:v:' opt; do
     case "$opt" in
-      h)
+    h)
+      usage
+      ;;
+
+    k)
+      kubeconfigContext="$OPTARG"
+      echo "name of kube-context is $OPTARG"
+      ;;
+
+    r)
+      clusterResourceId="$OPTARG"
+      echo "clusterResourceId is $OPTARG"
+      ;;
+
+    w)
+      workspaceResourceId="$OPTARG"
+      echo "workspaceResourceId is $OPTARG"
+      ;;
+
+    p)
+      proxyEndpoint="$OPTARG"
+      echo "proxyEndpoint is $OPTARG"
+      ;;
+
+    c)
+      servicePrincipalClientId="$OPTARG"
+      echo "servicePrincipalClientId is $OPTARG"
+      ;;
+
+    s)
+      servicePrincipalClientSecret="$OPTARG"
+      echo "clientSecret is *****"
+      ;;
+
+    t)
+      servicePrincipalTenantId="$OPTARG"
+      echo "service principal tenantId is $OPTARG"
+      ;;
+
+    n)
+      helmRepoName="$OPTARG"
+      echo "helm repo name is $OPTARG"
+      ;;
+
+    u)
+      helmRepoUrl="$OPTARG"
+      echo "helm repo url is $OPTARG"
+      ;;
+
+    v)
+      containerLogVolume="$OPTARG"
+      echo "container log volume is $OPTARG"
+      ;;
+
+    ?)
       usage
-        ;;
-
-      k)
-        kubeconfigContext="$OPTARG"
-        echo "name of kube-context is $OPTARG"
-        ;;
-
-      r)
-        clusterResourceId="$OPTARG"
-        echo "clusterResourceId is $OPTARG"
-        ;;
-
-      w)
-        workspaceResourceId="$OPTARG"
-        echo "workspaceResourceId is $OPTARG"
-        ;;
-
-      p)
-        proxyEndpoint="$OPTARG"
-        echo "proxyEndpoint is $OPTARG"
-        ;;
-
-      c)
-        servicePrincipalClientId="$OPTARG"
-        echo "servicePrincipalClientId is $OPTARG"
-        ;;
-
-      s)
-        servicePrincipalClientSecret="$OPTARG"
-        echo "clientSecret is *****"
-        ;;
-
-      t)
-        servicePrincipalTenantId="$OPTARG"
-        echo "service principal tenantId is $OPTARG"
-        ;;
-
-      n)
-        helmRepoName="$OPTARG"
-        echo "helm repo name is $OPTARG"
-        ;;
-
-      u)
-        helmRepoUrl="$OPTARG"
-        echo "helm repo url is $OPTARG"
-        ;;
-
-      v)
-        containerLogVolume="$OPTARG"
-        echo "container log volume is $OPTARG"
-        ;;
-
-      ?)
-        usage
-        exit 1
-        ;;
+      exit 1
+      ;;
     esac
   done
-  shift "$(($OPTIND -1))"
+  shift "$(($OPTIND - 1))"
 
+  local subscriptionId="$(echo ${clusterResourceId} | cut -d'/' -f3)"
+  local resourceGroup="$(echo ${clusterResourceId} | cut -d'/' -f5)"
 
- local subscriptionId="$(echo ${clusterResourceId} | cut -d'/' -f3)"
- local resourceGroup="$(echo ${clusterResourceId} | cut -d'/' -f5)"
+  # get resource parts and join back to get the provider name
+  local providerNameResourcePart1="$(echo ${clusterResourceId} | cut -d'/' -f7)"
+  local providerNameResourcePart2="$(echo ${clusterResourceId} | cut -d'/' -f8)"
+  local providerName="$(echo ${providerNameResourcePart1}/${providerNameResourcePart2})"
 
- # get resource parts and join back to get the provider name
- local providerNameResourcePart1="$(echo ${clusterResourceId} | cut -d'/' -f7)"
- local providerNameResourcePart2="$(echo ${clusterResourceId} | cut -d'/' -f8)"
- local providerName="$(echo ${providerNameResourcePart1}/${providerNameResourcePart2} )"
+  local clusterName="$(echo ${clusterResourceId} | cut -d'/' -f9)"
 
- local clusterName="$(echo ${clusterResourceId} | cut -d'/' -f9)"
+  # convert to lowercase for validation
+  providerName=$(echo $providerName | tr "[:upper:]" "[:lower:]")
 
- # convert to lowercase for validation
- providerName=$(echo $providerName | tr "[:upper:]" "[:lower:]")
+  echo "cluster SubscriptionId:" $subscriptionId
+  echo "cluster ResourceGroup:" $resourceGroup
+  echo "cluster ProviderName:" $providerName
+  echo "cluster Name:" $clusterName
 
- echo "cluster SubscriptionId:" $subscriptionId
- echo "cluster ResourceGroup:" $resourceGroup
- echo "cluster ProviderName:" $providerName
- echo "cluster Name:" $clusterName
-
- if [ -z "$subscriptionId" -o -z "$resourceGroup" -o -z "$providerName" -o  -z "$clusterName" ]; then
+  if [ -z "$subscriptionId" -o -z "$resourceGroup" -o -z "$providerName" -o -z "$clusterName" ]; then
     echo "-e invalid cluster resource id. Please try with valid fully qualified resource id of the cluster"
     exit 1
- fi
+  fi
 
- if [[ $providerName != microsoft.* ]]; then
-   echo "-e invalid azure cluster resource id format."
-   exit 1
- fi
+  if [[ $providerName != microsoft.* ]]; then
+    echo "-e invalid azure cluster resource id format."
+    exit 1
+  fi
 
- # detect the resource provider from the provider name in the cluster resource id
- # detect the resource provider from the provider name in the cluster resource id
- if [ $providerName = "microsoft.kubernetes/connectedclusters" ]; then
-    echo "provider cluster resource is of Azure ARC K8s cluster type"
+  # detect the resource provider from the provider name in the cluster resource id
+  if [ $providerName = "microsoft.kubernetes/connectedclusters" ]; then
+    echo "provider cluster resource is of Azure Arc enabled Kubernetes cluster type"
     isArcK8sCluster=true
     resourceProvider=$arcK8sResourceProvider
- elif [ $providerName = "microsoft.redhatopenshift/openshiftclusters" ]; then
+  elif [ $providerName = "microsoft.redhatopenshift/openshiftclusters" ]; then
     echo "provider cluster resource is of AROv4 cluster type"
     resourceProvider=$aroV4ResourceProvider
     isAroV4Cluster=true
- elif [ $providerName = "microsoft.containerservice/managedclusters" ]; then
+  elif [ $providerName = "microsoft.containerservice/managedclusters" ]; then
     echo "provider cluster resource is of AKS cluster type"
     isAksCluster=true
     resourceProvider=$aksResourceProvider
- else
-   echo "-e unsupported azure managed cluster type"
-   exit 1
- fi
+  else
+    echo "-e unsupported azure managed cluster type"
+    exit 1
+  fi
 
- if [ -z "$kubeconfigContext" ]; then
+  if [ -z "$kubeconfigContext" ]; then
     echo "using or getting current kube config context since --kube-context parameter not set "
- fi
+  fi
 
-if [ ! -z "$workspaceResourceId" ]; then
+  if [ ! -z "$workspaceResourceId" ]; then
     local workspaceSubscriptionId="$(echo $workspaceResourceId | cut -d'/' -f3)"
     local workspaceResourceGroup="$(echo $workspaceResourceId | cut -d'/' -f5)"
     local workspaceProviderName="$(echo $workspaceResourceId | cut -d'/' -f7)"
@@ -269,13 +265,13 @@ if [ ! -z "$workspaceResourceId" ]; then
     echo "workspace ProviderName:" $workspaceName
     echo "workspace Name:" $workspaceName
 
-   if [[ $workspaceProviderName != microsoft.operationalinsights* ]]; then
-     echo "-e invalid azure log analytics resource id format."
-     exit 1
-   fi
-fi
+    if [[ $workspaceProviderName != microsoft.operationalinsights* ]]; then
+      echo "-e invalid azure log analytics resource id format."
+      exit 1
+    fi
+  fi
 
-if [ ! -z "$proxyEndpoint" ]; then
+  if [ ! -z "$proxyEndpoint" ]; then
     # Validate Proxy Endpoint URL
     # extract the protocol://
     proto="$(echo $proxyEndpoint | grep :// | sed -e's,^\(.*://\).*,\1,g')"
@@ -302,23 +298,21 @@ if [ ! -z "$proxyEndpoint" ]; then
     else
       echo "successfully validated provided proxy endpoint is valid and in expected format"
     fi
-fi
+  fi
 
-if [ ! -z "$servicePrincipalClientId" -a  ! -z "$servicePrincipalClientSecret"  -a  ! -z "$servicePrincipalTenantId" ]; then
-   echo "using service principal creds (clientId, secret and tenantId) for azure login since provided"
-   isUsingServicePrincipal=true
-fi
+  if [ ! -z "$servicePrincipalClientId" -a ! -z "$servicePrincipalClientSecret" -a ! -z "$servicePrincipalTenantId" ]; then
+    echo "using service principal creds (clientId, secret and tenantId) for azure login since provided"
+    isUsingServicePrincipal=true
+  fi
 
 }
 
-configure_to_public_cloud()
-{
+configure_to_public_cloud() {
   echo "Set AzureCloud as active cloud for az cli"
   az cloud set -n $defaultAzureCloud
 }
 
-validate_cluster_identity()
-{
+validate_cluster_identity() {
   echo "validating cluster identity"
 
   local rgName="$(echo ${1})"
@@ -329,15 +323,14 @@ validate_cluster_identity()
   echo "cluster identity type:" $identitytype
 
   if [[ "$identitytype" != "systemassigned" ]]; then
-     echo "-e only supported cluster identity is systemassigned for Azure ARC K8s cluster type"
-     exit 1
+    echo "-e only supported cluster identity is systemassigned for Azure Arc enabled Kubernetes cluster type"
+    exit 1
   fi
 
   echo "successfully validated the identity of the cluster"
 }
 
-create_default_log_analytics_workspace()
-{
+create_default_log_analytics_workspace() {
 
   # extract subscription from cluster resource id
   local subscriptionId="$(echo $clusterResourceId | cut -d'/' -f3)"
@@ -348,73 +341,71 @@ create_default_log_analytics_workspace()
 
   # mapping fors for default Azure Log Analytics workspace
   declare -A AzureCloudLocationToOmsRegionCodeMap=(
-  [australiasoutheast]=ASE
-  [australiaeast]=EAU
-  [australiacentral]=CAU
-  [canadacentral]=CCA
-  [centralindia]=CIN
-  [centralus]=CUS
-  [eastasia]=EA
-  [eastus]=EUS
-  [eastus2]=EUS2
-  [eastus2euap]=EAP
-  [francecentral]=PAR
-  [japaneast]=EJP
-  [koreacentral]=SE
-  [northeurope]=NEU
-  [southcentralus]=SCUS
-  [southeastasia]=SEA
-  [uksouth]=SUK
-  [usgovvirginia]=USGV
-  [westcentralus]=EUS
-  [westeurope]=WEU
-  [westus]=WUS
-  [westus2]=WUS2
+    [australiasoutheast]=ASE
+    [australiaeast]=EAU
+    [australiacentral]=CAU
+    [canadacentral]=CCA
+    [centralindia]=CIN
+    [centralus]=CUS
+    [eastasia]=EA
+    [eastus]=EUS
+    [eastus2]=EUS2
+    [eastus2euap]=EAP
+    [francecentral]=PAR
+    [japaneast]=EJP
+    [koreacentral]=SE
+    [northeurope]=NEU
+    [southcentralus]=SCUS
+    [southeastasia]=SEA
+    [uksouth]=SUK
+    [usgovvirginia]=USGV
+    [westcentralus]=EUS
+    [westeurope]=WEU
+    [westus]=WUS
+    [westus2]=WUS2
   )
 
   declare -A AzureCloudRegionToOmsRegionMap=(
-  [australiacentral]=australiacentral
-  [australiacentral2]=australiacentral
-  [australiaeast]=australiaeast
-  [australiasoutheast]=australiasoutheast
-  [brazilsouth]=southcentralus
-  [canadacentral]=canadacentral
-  [canadaeast]=canadacentral
-  [centralus]=centralus
-  [centralindia]=centralindia
-  [eastasia]=eastasia
-  [eastus]=eastus
-  [eastus2]=eastus2
-  [francecentral]=francecentral
-  [francesouth]=francecentral
-  [japaneast]=japaneast
-  [japanwest]=japaneast
-  [koreacentral]=koreacentral
-  [koreasouth]=koreacentral
-  [northcentralus]=eastus
-  [northeurope]=northeurope
-  [southafricanorth]=westeurope
-  [southafricawest]=westeurope
-  [southcentralus]=southcentralus
-  [southeastasia]=southeastasia
-  [southindia]=centralindia
-  [uksouth]=uksouth
-  [ukwest]=uksouth
-  [westcentralus]=eastus
-  [westeurope]=westeurope
-  [westindia]=centralindia
-  [westus]=westus
-  [westus2]=westus2
+    [australiacentral]=australiacentral
+    [australiacentral2]=australiacentral
+    [australiaeast]=australiaeast
+    [australiasoutheast]=australiasoutheast
+    [brazilsouth]=southcentralus
+    [canadacentral]=canadacentral
+    [canadaeast]=canadacentral
+    [centralus]=centralus
+    [centralindia]=centralindia
+    [eastasia]=eastasia
+    [eastus]=eastus
+    [eastus2]=eastus2
+    [francecentral]=francecentral
+    [francesouth]=francecentral
+    [japaneast]=japaneast
+    [japanwest]=japaneast
+    [koreacentral]=koreacentral
+    [koreasouth]=koreacentral
+    [northcentralus]=eastus
+    [northeurope]=northeurope
+    [southafricanorth]=westeurope
+    [southafricawest]=westeurope
+    [southcentralus]=southcentralus
+    [southeastasia]=southeastasia
+    [southindia]=centralindia
+    [uksouth]=uksouth
+    [ukwest]=uksouth
+    [westcentralus]=eastus
+    [westeurope]=westeurope
+    [westindia]=centralindia
+    [westus]=westus
+    [westus2]=westus2
   )
 
-  if [ -n "${AzureCloudRegionToOmsRegionMap[$clusterRegion]}" ];
-  then
+  if [ -n "${AzureCloudRegionToOmsRegionMap[$clusterRegion]}" ]; then
     workspaceRegion=${AzureCloudRegionToOmsRegionMap[$clusterRegion]}
   fi
   echo "Workspace Region:"$workspaceRegion
 
-  if [ -n "${AzureCloudLocationToOmsRegionCodeMap[$workspaceRegion]}" ];
-  then
+  if [ -n "${AzureCloudLocationToOmsRegionCodeMap[$workspaceRegion]}" ]; then
     workspaceRegionCode=${AzureCloudLocationToOmsRegionCodeMap[$workspaceRegion]}
   fi
   echo "Workspace Region Code:"$workspaceRegionCode
@@ -423,30 +414,28 @@ create_default_log_analytics_workspace()
   isRGExists=$(az group exists -g $workspaceResourceGroup)
   workspaceName="DefaultWorkspace-"$subscriptionId"-"$workspaceRegionCode
 
-  if $isRGExists
-  then echo "using existing default resource group:"$workspaceResourceGroup
+  if $isRGExists; then
+    echo "using existing default resource group:"$workspaceResourceGroup
   else
     echo "creating resource group: $workspaceResourceGroup in region: $workspaceRegion"
     az group create -g $workspaceResourceGroup -l $workspaceRegion
   fi
 
-  workspaceList=$(az resource list -g $workspaceResourceGroup -n $workspaceName  --resource-type $workspaceResourceProvider)
-  if [ "$workspaceList" = "[]" ];
-  then
-  # create new default workspace since no mapped existing default workspace
-  echo '{"location":"'"$workspaceRegion"'", "properties":{"sku":{"name": "standalone"}}}' > WorkspaceProps.json
-  cat WorkspaceProps.json
-  workspace=$(az resource create -g $workspaceResourceGroup -n $workspaceName --resource-type $workspaceResourceProvider --is-full-object -p @WorkspaceProps.json)
+  workspaceList=$(az resource list -g $workspaceResourceGroup -n $workspaceName --resource-type $workspaceResourceProvider)
+  if [ "$workspaceList" = "[]" ]; then
+    # create new default workspace since no mapped existing default workspace
+    echo '{"location":"'"$workspaceRegion"'", "properties":{"sku":{"name": "standalone"}}}' >WorkspaceProps.json
+    cat WorkspaceProps.json
+    workspace=$(az resource create -g $workspaceResourceGroup -n $workspaceName --resource-type $workspaceResourceProvider --is-full-object -p @WorkspaceProps.json)
   else
     echo "using existing default workspace:"$workspaceName
   fi
 
-  workspaceResourceId=$(az resource show -g $workspaceResourceGroup -n $workspaceName  --resource-type $workspaceResourceProvider --query id)
+  workspaceResourceId=$(az resource show -g $workspaceResourceGroup -n $workspaceName --resource-type $workspaceResourceProvider --query id)
   workspaceResourceId=$(echo $workspaceResourceId | tr -d '"')
 }
 
-add_container_insights_solution()
-{
+add_container_insights_solution() {
   local resourceId="$(echo ${1})"
 
   # extract resource group from workspace resource id
@@ -456,10 +445,9 @@ add_container_insights_solution()
   solution=$(az deployment group create -g $resourceGroup --template-uri $solutionTemplateUri --parameters workspaceResourceId=$resourceId --parameters workspaceRegion=$workspaceRegion)
 }
 
-get_workspace_guid_and_key()
-{
+get_workspace_guid_and_key() {
   # extract resource parts from workspace resource id
-  local resourceId="$(echo ${1} | tr -d '"' )"
+  local resourceId="$(echo ${1} | tr -d '"')"
   local subId="$(echo ${resourceId} | cut -d'/' -f3)"
   local rgName="$(echo ${resourceId} | cut -d'/' -f5)"
   local wsName="$(echo ${resourceId} | cut -d'/' -f9)"
@@ -474,11 +462,10 @@ get_workspace_guid_and_key()
   workspaceKey=$(echo $workspaceKey | tr -d '"')
 }
 
-install_helm_chart()
-{
+install_helm_chart() {
 
- # get the config-context for ARO v4 cluster
- if [ "$isAroV4Cluster" = true ] ; then
+  # get the config-context for ARO v4 cluster
+  if [ "$isAroV4Cluster" = true ]; then
     echo "getting config-context of ARO v4 cluster "
     echo "getting admin user creds for aro v4 cluster"
     adminUserName=$(az aro list-credentials -g $clusterResourceGroup -n $clusterName --query 'kubeadminUsername' -o tsv)
@@ -490,83 +477,84 @@ install_helm_chart()
     oc new-project $openshiftProjectName
     echo "getting config-context of aro v4 cluster"
     kubeconfigContext=$(oc config current-context)
- fi
-
- if [ -z "$kubeconfigContext" ]; then
-     echo "installing Azure Monitor for containers HELM chart on to the cluster and using current kube context ..."
- else
-  echo "installing Azure Monitor for containers HELM chart on to the cluster with kubecontext:${kubeconfigContext} ..."
- fi
-
- echo "getting the region of the cluster"
- clusterRegion=$(az resource show --ids ${clusterResourceId} --query location -o tsv)
- echo "cluster region is : ${clusterRegion}"
-
- echo "adding helm repo:" $helmRepoName
- helm repo add $helmRepoName $helmRepoUrl
-
- echo "updating helm repo to get latest charts"
- helm repo update
-
- if [ ! -z "$proxyEndpoint" ]; then
-   echo "using proxy endpoint since proxy configuration passed in"
-   if [ -z "$kubeconfigContext" ]; then
-     echo "using current kube-context since --kube-context/-k parameter not passed in"
-     helm upgrade --install azmon-containers-release-1 --set omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmRepoName/$helmChartName
-   else
-     echo "using --kube-context:${kubeconfigContext} since passed in"
-     helm upgrade --install azmon-containers-release-1 --set omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmRepoName/$helmChartName --kube-context ${kubeconfigContext}
-   fi
- else
-   if [ -z "$kubeconfigContext" ]; then
-     echo "using current kube-context since --kube-context/-k parameter not passed in"
-     helm upgrade --install azmon-containers-release-1 --set omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmRepoName/$helmChartName
-   else
-     echo "using --kube-context:${kubeconfigContext} since passed in"
-     helm upgrade --install azmon-containers-release-1 --set omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmRepoName/$helmChartName --kube-context ${kubeconfigContext}
-   fi
- fi
-
- echo "chart installation completed."
+  fi
+
+  if [ -z "$kubeconfigContext" ]; then
+    echo "installing Azure Monitor for containers HELM chart on to the cluster and using current kube context ..."
+  else
+    echo "installing Azure Monitor for containers HELM chart on to the cluster with kubecontext:${kubeconfigContext} ..."
+  fi
+
+  echo "getting the region of the cluster"
+  clusterRegion=$(az resource show --ids ${clusterResourceId} --query location -o tsv)
+  echo "cluster region is : ${clusterRegion}"
+
+  echo "pull the chart version ${mcrChartVersion} from ${mcr}/${mcrChartRepoPath}"
+  export HELM_EXPERIMENTAL_OCI=1
+  helm chart pull $mcr/$mcrChartRepoPath:$mcrChartVersion
+
+  echo "export the chart from local cache to current directory"
+  helm chart export $mcr/$mcrChartRepoPath:$mcrChartVersion --destination .
+
+  helmChartRepoPath=$helmLocalRepoName/$helmChartName
+
+  echo "helm chart repo path: ${helmChartRepoPath}"
+
+  if [ ! -z "$proxyEndpoint" ]; then
+    echo "using proxy endpoint since proxy configuration passed in"
+    if [ -z "$kubeconfigContext" ]; then
+      echo "using current kube-context since --kube-context/-k parameter not passed in"
+      helm upgrade --install $releaseName --set omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath
+    else
+      echo "using --kube-context:${kubeconfigContext} since passed in"
+      helm upgrade --install $releaseName --set omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath --kube-context ${kubeconfigContext}
+    fi
+  else
+    if [ -z "$kubeconfigContext" ]; then
+      echo "using current kube-context since --kube-context/-k parameter not passed in"
+      helm upgrade --install $releaseName --set omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath
+    else
+      echo "using --kube-context:${kubeconfigContext} since passed in"
+      helm upgrade --install $releaseName --set omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath --kube-context ${kubeconfigContext}
+    fi
+  fi
+
+  echo "chart installation completed."
 
 }
 
-login_to_azure()
-{
-  if [ "$isUsingServicePrincipal" = true ] ; then
-     echo "login to the azure using provided service principal creds"
-     az login --service-principal --username $servicePrincipalClientId --password $servicePrincipalClientSecret --tenant $servicePrincipalTenantId
+login_to_azure() {
+  if [ "$isUsingServicePrincipal" = true ]; then
+    echo "login to the azure using provided service principal creds"
+    az login --service-principal --username $servicePrincipalClientId --password $servicePrincipalClientSecret --tenant $servicePrincipalTenantId
   else
     echo "login to the azure interactively"
     az login --use-device-code
   fi
 }
 
-set_azure_subscription()
-{
- local subscriptionId="$(echo ${1})"
- echo "setting the subscription id: ${subscriptionId} as current subscription for the azure cli"
- az account set -s ${subscriptionId}
- echo "successfully configured subscription id: ${subscriptionId} as current subscription for the azure cli"
+set_azure_subscription() {
+  local subscriptionId="$(echo ${1})"
+  echo "setting the subscription id: ${subscriptionId} as current subscription for the azure cli"
+  az account set -s ${subscriptionId}
+  echo "successfully configured subscription id: ${subscriptionId} as current subscription for the azure cli"
 }
 
-attach_monitoring_tags()
-{
+attach_monitoring_tags() {
   echo "attach loganalyticsworkspaceResourceId tag on to cluster resource"
-  status=$(az  resource update --set tags.logAnalyticsWorkspaceResourceId=$workspaceResourceId -g $clusterResourceGroup -n $clusterName --resource-type $resourceProvider)
+  status=$(az resource update --set tags.logAnalyticsWorkspaceResourceId=$workspaceResourceId -g $clusterResourceGroup -n $clusterName --resource-type $resourceProvider)
   echo "$status"
   echo "successfully attached logAnalyticsWorkspaceResourceId tag on the cluster resource"
 }
 
 # enables aks monitoring addon for private preview and dont use this for aks prod
-enable_aks_monitoring_addon()
-{
- echo "getting cluster object"
- clusterGetResponse=$(az rest --method get --uri $clusterResourceId?api-version=2020-03-01)
- export jqquery=".properties.addonProfiles.omsagent.config.logAnalyticsWorkspaceResourceID=\"$workspaceResourceId\""
- echo $clusterGetResponse | jq $jqquery > putrequestbody.json
- status=$(az rest --method put --uri $clusterResourceId?api-version=2020-03-01 --body @putrequestbody.json --headers Content-Type=application/json)
- echo "status after enabling of aks monitoringa addon:$status"
+enable_aks_monitoring_addon() {
+  echo "getting cluster object"
+  clusterGetResponse=$(az rest --method get --uri $clusterResourceId?api-version=2020-03-01)
+  export jqquery=".properties.addonProfiles.omsagent.config.logAnalyticsWorkspaceResourceID=\"$workspaceResourceId\""
+  echo $clusterGetResponse | jq $jqquery >putrequestbody.json
+  status=$(az rest --method put --uri $clusterResourceId?api-version=2020-03-01 --body @putrequestbody.json --headers Content-Type=application/json)
+  echo "status after enabling of aks monitoringa addon:$status"
 }
 
 # parse and validate args
@@ -587,9 +575,9 @@ login_to_azure
 # set the cluster subscription id as active sub for azure cli
 set_azure_subscription $clusterSubscriptionId
 
-# validate cluster identity if its ARC k8s cluster
-if [ "$isArcK8sCluster" = true ] ; then
-   validate_cluster_identity $clusterResourceGroup $clusterName
+# validate cluster identity if its Azure Arc enabled Kubernetes cluster
+if [ "$isArcK8sCluster" = true ]; then
+  validate_cluster_identity $clusterResourceGroup $clusterName
 fi
 
 if [ -z $workspaceResourceId ]; then
@@ -598,7 +586,7 @@ if [ -z $workspaceResourceId ]; then
 else
   echo "using provided azure log analytics workspace:${workspaceResourceId}"
   workspaceResourceId=$(echo $workspaceResourceId | tr -d '"')
-  workspaceSubscriptionId="$(echo ${workspaceResourceId} | cut -d'/' -f3 | tr "[:upper:]" "[:lower:]" )"
+  workspaceSubscriptionId="$(echo ${workspaceResourceId} | cut -d'/' -f3 | tr "[:upper:]" "[:lower:]")"
   workspaceResourceGroup="$(echo ${workspaceResourceId} | cut -d'/' -f5)"
   workspaceName="$(echo ${workspaceResourceId} | cut -d'/' -f9)"
 
@@ -620,13 +608,13 @@ add_container_insights_solution $workspaceResourceId
 # get workspace guid and key
 get_workspace_guid_and_key $workspaceResourceId
 
-if [ "$isClusterAndWorkspaceInSameSubscription" = false ] ; then
+if [ "$isClusterAndWorkspaceInSameSubscription" = false ]; then
   echo "switch to cluster subscription id as active subscription for cli: ${clusterSubscriptionId}"
   set_azure_subscription $clusterSubscriptionId
 fi
 
 # attach monitoring tags on to cluster resource
-if [ "$isAksCluster" = true ] ; then
+if [ "$isAksCluster" = true ]; then
   enable_aks_monitoring_addon
 else
   attach_monitoring_tags
diff --git a/scripts/onboarding/managed/upgrade-monitoring.sh b/scripts/onboarding/managed/upgrade-monitoring.sh
new file mode 100644
index 000000000..8a12b2f02
--- /dev/null
+++ b/scripts/onboarding/managed/upgrade-monitoring.sh
@@ -0,0 +1,314 @@
+#!/bin/bash
+#
+# Execute this directly in Azure Cloud Shell (https://shell.azure.com) by pasting (SHIFT+INS on Windows, CTRL+V on Mac or Linux)
+# the following line (beginning with curl...) at the command prompt and then replacing the args:
+#  This scripts upgrades the existing Azure Monitor for containers release on Azure Arc enabled Kubernetes cluster
+#
+#  1. Upgrades existing Azure Monitor for containers release to the K8s cluster in provided via --kube-context
+# Prerequisites :
+#     Azure CLI:  https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest
+#     Helm3 : https://helm.sh/docs/intro/install/
+
+# download script
+# curl -o enable-monitoring.sh -L https://aka.ms/upgrade-monitoring-bash-script
+# 1. Using Service Principal for Azure Login
+## bash upgrade-monitoring.sh --client-id <sp client id> --client-secret <sp client secret> --tenant-id <tenant id of the service principal>
+# 2. Using Interactive device login
+# bash upgrade-monitoring.sh --resource-id <clusterResourceId>
+
+set -e
+set -o pipefail
+
+# released chart version for Azure Arc enabled Kubernetes public preview
+mcrChartVersion="2.7.6"
+mcr="mcr.microsoft.com"
+mcrChartRepoPath="azuremonitor/containerinsights/preview/azuremonitor-containers"
+
+# default to public cloud since only supported cloud is azure public clod
+defaultAzureCloud="AzureCloud"
+helmLocalRepoName="."
+helmChartName="azuremonitor-containers"
+
+# default release name used during onboarding
+releaseName="azmon-containers-release-1"
+
+# resource provider for azure arc connected cluster
+arcK8sResourceProvider="Microsoft.Kubernetes/connectedClusters"
+
+# default of resourceProvider is Azure Arc enabled Kubernetes and this will get updated based on the provider cluster resource
+resourceProvider="Microsoft.Kubernetes/connectedClusters"
+
+# Azure Arc enabled Kubernetes cluster resource
+isArcK8sCluster=false
+
+# openshift project name for aro v4 cluster
+openshiftProjectName="azure-monitor-for-containers"
+
+# Azure Arc enabled Kubernetes cluster resource
+isAroV4Cluster=false
+
+# default global params
+clusterResourceId=""
+kubeconfigContext=""
+
+# default workspace region and code
+workspaceRegion="eastus"
+workspaceRegionCode="EUS"
+workspaceResourceGroup="DefaultResourceGroup-"$workspaceRegionCode
+
+# default workspace guid and key
+workspaceGuid=""
+workspaceKey=""
+
+# sp details for the login if provided
+servicePrincipalClientId=""
+servicePrincipalClientSecret=""
+servicePrincipalTenantId=""
+isUsingServicePrincipal=false
+
+usage() {
+  local basename=$(basename $0)
+  echo
+  echo "Upgrade Azure Monitor for containers:"
+  echo "$basename --resource-id <cluster resource id> [--client-id <clientId of service principal>] [--client-secret <client secret of service principal>] [--tenant-id <tenant id of the service principal>] [--kube-context <name of the kube context >]"
+}
+
+parse_args() {
+
+  if [ $# -le 1 ]; then
+    usage
+    exit 1
+  fi
+
+  # Transform long options to short ones
+  for arg in "$@"; do
+    shift
+    case "$arg" in
+    "--resource-id") set -- "$@" "-r" ;;
+    "--kube-context") set -- "$@" "-k" ;;
+     "--client-id") set -- "$@" "-c" ;;
+    "--client-secret") set -- "$@" "-s" ;;
+    "--tenant-id") set -- "$@" "-t" ;;
+    "--"*) usage ;;
+    *) set -- "$@" "$arg" ;;
+    esac
+  done
+
+  local OPTIND opt
+
+  while getopts 'hk:r:c:s:t:' opt; do
+    case "$opt" in
+    h)
+      usage
+      ;;
+
+    k)
+      kubeconfigContext="$OPTARG"
+      echo "name of kube-context is $OPTARG"
+      ;;
+
+    r)
+      clusterResourceId="$OPTARG"
+      echo "clusterResourceId is $OPTARG"
+      ;;
+
+    c)
+      servicePrincipalClientId="$OPTARG"
+      echo "servicePrincipalClientId is $OPTARG"
+      ;;
+
+    s)
+      servicePrincipalClientSecret="$OPTARG"
+      echo "clientSecret is *****"
+      ;;
+
+    t)
+      servicePrincipalTenantId="$OPTARG"
+      echo "service principal tenantId is $OPTARG"
+      ;;
+
+    ?)
+      usage
+      exit 1
+      ;;
+    esac
+  done
+  shift "$(($OPTIND - 1))"
+
+  local subscriptionId="$(echo ${clusterResourceId} | cut -d'/' -f3)"
+  local resourceGroup="$(echo ${clusterResourceId} | cut -d'/' -f5)"
+
+  # get resource parts and join back to get the provider name
+  local providerNameResourcePart1="$(echo ${clusterResourceId} | cut -d'/' -f7)"
+  local providerNameResourcePart2="$(echo ${clusterResourceId} | cut -d'/' -f8)"
+  local providerName="$(echo ${providerNameResourcePart1}/${providerNameResourcePart2})"
+
+  local clusterName="$(echo ${clusterResourceId} | cut -d'/' -f9)"
+
+  # convert to lowercase for validation
+  providerName=$(echo $providerName | tr "[:upper:]" "[:lower:]")
+
+  echo "cluster SubscriptionId:" $subscriptionId
+  echo "cluster ResourceGroup:" $resourceGroup
+  echo "cluster ProviderName:" $providerName
+  echo "cluster Name:" $clusterName
+
+  if [ -z "$subscriptionId" -o -z "$resourceGroup" -o -z "$providerName" -o -z "$clusterName" ]; then
+    echo "-e invalid cluster resource id. Please try with valid fully qualified resource id of the cluster"
+    exit 1
+  fi
+
+  if [[ $providerName != microsoft.* ]]; then
+    echo "-e invalid azure cluster resource id format."
+    exit 1
+  fi
+
+  # detect the resource provider from the provider name in the cluster resource id
+  if [ $providerName = "microsoft.kubernetes/connectedclusters" ]; then
+    echo "provider cluster resource is of Azure Arc enabled Kubernetes cluster type"
+    isArcK8sCluster=true
+    resourceProvider=$arcK8sResourceProvider
+  elif [ $providerName = "microsoft.redhatopenshift/openshiftclusters" ]; then
+    echo "provider cluster resource is of AROv4 cluster type"
+    resourceProvider=$aroV4ResourceProvider
+    isAroV4Cluster=true
+  elif [ $providerName = "microsoft.containerservice/managedclusters" ]; then
+    echo "provider cluster resource is of AKS cluster type"
+    isAksCluster=true
+    resourceProvider=$aksResourceProvider
+  else
+    echo "-e unsupported azure managed cluster type"
+    exit 1
+  fi
+
+  if [ -z "$kubeconfigContext" ]; then
+    echo "using or getting current kube config context since --kube-context parameter not set "
+  fi
+
+  if [ ! -z "$servicePrincipalClientId" -a ! -z "$servicePrincipalClientSecret" -a ! -z "$servicePrincipalTenantId" ]; then
+    echo "using service principal creds (clientId, secret and tenantId) for azure login since provided"
+    isUsingServicePrincipal=true
+  fi
+}
+
+configure_to_public_cloud() {
+  echo "Set AzureCloud as active cloud for az cli"
+  az cloud set -n $defaultAzureCloud
+}
+
+validate_cluster_identity() {
+  echo "validating cluster identity"
+
+  local rgName="$(echo ${1})"
+  local clusterName="$(echo ${2})"
+
+  local identitytype=$(az resource show -g ${rgName} -n ${clusterName} --resource-type $resourceProvider --query identity.type)
+  identitytype=$(echo $identitytype | tr "[:upper:]" "[:lower:]" | tr -d '"')
+  echo "cluster identity type:" $identitytype
+
+  if [[ "$identitytype" != "systemassigned" ]]; then
+    echo "-e only supported cluster identity is systemassigned for Azure Arc enabled Kubernetes cluster type"
+    exit 1
+  fi
+
+  echo "successfully validated the identity of the cluster"
+}
+
+validate_monitoring_tags() {
+  echo "get loganalyticsworkspaceResourceId tag on to cluster resource"
+  logAnalyticsWorkspaceResourceIdTag=$(az resource show --query tags.logAnalyticsWorkspaceResourceId -g $clusterResourceGroup -n $clusterName --resource-type $resourceProvider)
+  echo "configured log analytics workspace: ${logAnalyticsWorkspaceResourceIdTag}"
+  echo "successfully got logAnalyticsWorkspaceResourceId tag on the cluster resource"
+  if [ -z "$logAnalyticsWorkspaceResourceIdTag" ]; then
+    echo "-e logAnalyticsWorkspaceResourceId doesnt exist on this cluster which indicates cluster not enabled for monitoring"
+    exit 1
+  fi
+}
+
+
+upgrade_helm_chart_release() {
+
+  # get the config-context for ARO v4 cluster
+  if [ "$isAroV4Cluster" = true ]; then
+    echo "getting config-context of ARO v4 cluster "
+    echo "getting admin user creds for aro v4 cluster"
+    adminUserName=$(az aro list-credentials -g $clusterResourceGroup -n $clusterName --query 'kubeadminUsername' -o tsv)
+    adminPassword=$(az aro list-credentials -g $clusterResourceGroup -n $clusterName --query 'kubeadminPassword' -o tsv)
+    apiServer=$(az aro show -g $clusterResourceGroup -n $clusterName --query apiserverProfile.url -o tsv)
+    echo "login to the cluster via oc login"
+    oc login $apiServer -u $adminUserName -p $adminPassword
+    echo "creating project azure-monitor-for-containers"
+    oc new-project $openshiftProjectName
+    echo "getting config-context of aro v4 cluster"
+    kubeconfigContext=$(oc config current-context)
+  fi
+
+  if [ -z "$kubeconfigContext" ]; then
+    echo "installing Azure Monitor for containers HELM chart on to the cluster and using current kube context ..."
+  else
+    echo "installing Azure Monitor for containers HELM chart on to the cluster with kubecontext:${kubeconfigContext} ..."
+  fi
+
+  export HELM_EXPERIMENTAL_OCI=1
+
+  echo "pull the chart from ${mcr}/${mcrChartRepoPath}:${mcrChartVersion}"
+  helm chart pull ${mcr}/${mcrChartRepoPath}:${mcrChartVersion}
+
+  echo "export the chart from local cache to current directory"
+  helm chart export ${mcr}/${mcrChartRepoPath}:${mcrChartVersion} --destination .
+
+  helmChartRepoPath=$helmLocalRepoName/$helmChartName
+
+  echo "upgrading the release: $releaseName to chart version : ${mcrChartVersion}"
+  helm get values $releaseName -o yaml | helm upgrade --install $releaseName $helmChartRepoPath -f -
+  echo "$releaseName got upgraded successfully."
+}
+
+login_to_azure() {
+  if [ "$isUsingServicePrincipal" = true ]; then
+    echo "login to the azure using provided service principal creds"
+    az login --service-principal --username $servicePrincipalClientId --password $servicePrincipalClientSecret --tenant $servicePrincipalTenantId
+  else
+    echo "login to the azure interactively"
+    az login --use-device-code
+  fi
+}
+
+set_azure_subscription() {
+  local subscriptionId="$(echo ${1})"
+  echo "setting the subscription id: ${subscriptionId} as current subscription for the azure cli"
+  az account set -s ${subscriptionId}
+  echo "successfully configured subscription id: ${subscriptionId} as current subscription for the azure cli"
+}
+
+# parse and validate args
+parse_args $@
+
+# configure azure cli for public cloud
+configure_to_public_cloud
+
+# parse cluster resource id
+clusterSubscriptionId="$(echo $clusterResourceId | cut -d'/' -f3 | tr "[:upper:]" "[:lower:]")"
+clusterResourceGroup="$(echo $clusterResourceId | cut -d'/' -f5)"
+providerName="$(echo $clusterResourceId | cut -d'/' -f7)"
+clusterName="$(echo $clusterResourceId | cut -d'/' -f9)"
+
+# login to azure
+login_to_azure
+
+# set the cluster subscription id as active sub for azure cli
+set_azure_subscription $clusterSubscriptionId
+
+# validate cluster identity if its Azure Arc enabled Kubernetes cluster
+if [ "$isArcK8sCluster" = true ]; then
+  validate_cluster_identity $clusterResourceGroup $clusterName
+fi
+
+# validate the cluster has monitoring tags
+validate_monitoring_tags
+
+# upgrade helm chart release
+upgrade_helm_chart_release
+
+# portal link
+echo "Proceed to https://aka.ms/azmon-containers to view health of your newly onboarded cluster"

From e6dad8354e38efc1fdd9eafbb269aa9d9e26fefd Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 1 Oct 2020 14:08:31 -0700
Subject: [PATCH 30/60] Install CA certs from wireserver (#451)

---
 kubernetes/windows/main.ps1 | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1
index de82722ad..2e8659601 100644
--- a/kubernetes/windows/main.ps1
+++ b/kubernetes/windows/main.ps1
@@ -263,6 +263,27 @@ function Generate-Certificates {
     C:\\opt\\omsagentwindows\\certgenerator\\certificategenerator.exe
 }
 
+function Bootstrap-CACertificates {
+    try {
+        # This is required when the root CA certs are different for some clouds.
+        $caCerts=Invoke-WebRequest 'http://168.63.129.16/machine?comp=acmspackage&type=cacertificates&ext=json' -UseBasicParsing | ConvertFrom-Json
+        if (![string]::IsNullOrEmpty($caCerts)) {
+            $certificates = $caCerts.Certificates
+            for ($index = 0; $index -lt $certificates.Length ; $index++) {
+                $name=$certificates[$index].Name
+                $certificates[$index].CertBody > $name
+                Write-Host "name: $($name)"
+                Import-Certificate -FilePath .\$name  -CertStoreLocation 'Cert:\LocalMachine\Root' -Verbose
+            }
+        }
+    }
+    catch {
+        $e = $_.Exception
+        Write-Host $e
+        Write-Host "exception occured in Bootstrap-CACertificates..."
+    }
+}
+
 function Test-CertificatePath {
     $certLocation = $env:CI_CERT_LOCATION
     $keyLocation = $env:CI_KEY_LOCATION
@@ -288,6 +309,14 @@ Start-Transcript -Path main.txt
 Remove-WindowsServiceIfItExists "fluentdwinaks"
 Set-EnvironmentVariables
 Start-FileSystemWatcher
+
+#Bootstrapping CA certs for non public clouds and AKS clusters
+$aksResourceId = [System.Environment]::GetEnvironmentVariable("AKS_RESOURCE_ID")
+if (![string]::IsNullOrEmpty($aksResourceId) -and $aksResourceId.ToLower().Contains("/microsoft.containerservice/managedclusters/"))
+{
+    Bootstrap-CACertificates
+}
+
 Generate-Certificates
 Test-CertificatePath
 Start-Fluent

From 23397edf3764870dde9d7f4eef10f0842ae5adc6 Mon Sep 17 00:00:00 2001
From: Grace Wehner <grace.wehner@microsoft.com>
Date: Thu, 1 Oct 2020 16:14:49 -0700
Subject: [PATCH 31/60] grwehner/pv-volume-name-in-mdm (#452)

Add volume name for PV to mdm dimensions and zero fill it
---
 source/plugins/ruby/MdmAlertTemplates.rb   | 2 ++
 source/plugins/ruby/MdmMetricsGenerator.rb | 3 +++
 source/plugins/ruby/constants.rb           | 1 +
 3 files changed, 6 insertions(+)

diff --git a/source/plugins/ruby/MdmAlertTemplates.rb b/source/plugins/ruby/MdmAlertTemplates.rb
index d5107fea1..ef63cf219 100644
--- a/source/plugins/ruby/MdmAlertTemplates.rb
+++ b/source/plugins/ruby/MdmAlertTemplates.rb
@@ -101,6 +101,7 @@ class MdmAlertTemplates
                     "podName",
                     "node",
                     "kubernetesNamespace",
+                    "volumeName",
                     "thresholdPercentage"
                 ],
                 "series": [
@@ -109,6 +110,7 @@ class MdmAlertTemplates
                         "%{podNameDimValue}",
                         "%{computerNameDimValue}",
                         "%{namespaceDimValue}",
+                        "%{volumeNameDimValue}",
                         "%{thresholdPercentageDimValue}"
                     ],
                     "min": %{pvResourceUtilizationPercentage},
diff --git a/source/plugins/ruby/MdmMetricsGenerator.rb b/source/plugins/ruby/MdmMetricsGenerator.rb
index b8104212d..12d462e44 100644
--- a/source/plugins/ruby/MdmMetricsGenerator.rb
+++ b/source/plugins/ruby/MdmMetricsGenerator.rb
@@ -186,6 +186,7 @@ def zeroFillMetricRecords(records, batch_time)
         pvZeroFillDims = {}
         pvZeroFillDims[Constants::INSIGHTSMETRICS_TAGS_PVC_NAMESPACE] = Constants::KUBESYSTEM_NAMESPACE_ZERO_FILL
         pvZeroFillDims[Constants::INSIGHTSMETRICS_TAGS_POD_NAME] = Constants::OMSAGENT_ZERO_FILL
+        pvZeroFillDims[Constants::INSIGHTSMETRICS_TAGS_VOLUME_NAME] = Constants::VOLUME_NAME_ZERO_FILL
         pvResourceUtilMetricRecord = getPVResourceUtilMetricRecords(batch_time,
                                                                     Constants::PV_USED_BYTES,
                                                                     @@hostName,
@@ -289,6 +290,7 @@ def getPVResourceUtilMetricRecords(recordTimeStamp, metricName, computer, percen
         pvcNamespace = dims[Constants::INSIGHTSMETRICS_TAGS_PVC_NAMESPACE]
         podName = dims[Constants::INSIGHTSMETRICS_TAGS_POD_NAME]
         podUid = dims[Constants::INSIGHTSMETRICS_TAGS_POD_UID]
+        volumeName = dims[Constants::INSIGHTSMETRICS_TAGS_VOLUME_NAME]
 
         resourceUtilRecord = MdmAlertTemplates::PV_resource_utilization_template % {
           timestamp: recordTimeStamp,
@@ -296,6 +298,7 @@ def getPVResourceUtilMetricRecords(recordTimeStamp, metricName, computer, percen
           podNameDimValue: podName,
           computerNameDimValue: computer,
           namespaceDimValue: pvcNamespace,
+          volumeNameDimValue: volumeName,
           pvResourceUtilizationPercentage: percentageMetricValue,
           thresholdPercentageDimValue: thresholdPercentage,
         }
diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index be1a9de64..35e5f9334 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -76,6 +76,7 @@ class Constants
   TELEGRAF_DISK_METRICS = "container.azm.ms/disk"
   OMSAGENT_ZERO_FILL = "omsagent"
   KUBESYSTEM_NAMESPACE_ZERO_FILL = "kube-system"
+  VOLUME_NAME_ZERO_FILL = "-"
 
   #Telemetry constants
   CONTAINER_METRICS_HEART_BEAT_EVENT = "ContainerMetricsMdmHeartBeatEvent"

From 7562a96696cb4882f8387ba405b8a0f0145b00ad Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Mon, 5 Oct 2020 13:57:01 -0700
Subject: [PATCH 32/60] Release changes for 10052020 release (#453)

* Release changes for 10052020 release

* remove redundant kubelet metrics as part of PR feedback
---
 ReleaseNotes.md                            | 18 ++++++++++++++++++
 build/version                              |  6 +++---
 charts/azuremonitor-containers/Chart.yaml  |  2 +-
 charts/azuremonitor-containers/values.yaml |  6 +++---
 kubernetes/linux/Dockerfile                |  2 +-
 kubernetes/omsagent.yaml                   | 12 ++++++------
 kubernetes/windows/Dockerfile              |  2 +-
 7 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index 499c99f02..e1892d083 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -11,6 +11,24 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
 
+### 10/05/2020 -
+##### Version microsoft/oms:ciprod10052020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10052020 (linux)
+##### Version microsoft/oms:win-ciprod10052020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod10052020 (windows)
+##### Code change log
+- Health CRD to version v1 (from v1beta1) for k8s versions >= 1.19.0
+- Collection of PV usage metrics for PVs mounted by pods (kube-system pods excluded by default)(doc-link-needed)
+- Zero fill few custom metrics under a timer, also add zero filling for new PV usage metrics
+- Collection of additional Kubelet metrics ('kubelet_running_pod_count','volume_manager_total_volumes','kubelet_node_config_error','process_resident_memory_bytes','process_cpu_seconds_total','kubelet_runtime_operations_total','kubelet_runtime_operations_errors_total'). This also includes updates to 'kubelet' workbook to include these new metrics
+- Collection of Azure NPM (Network Policy Manager) metrics (basic & advanced. By default, NPM metrics collection is turned OFF)(doc-link-needed)
+- Support log collection when docker root is changed with knode. Tracked by [this](https://github.com/Azure/AKS/issues/1373) issue
+- Support for Pods in 'Terminating' state for nodelost scenarios
+- Fix for reduction in telemetry for custom metrics ingestion failures
+- Fix CPU capacity/limits metrics being 0 for Virtual nodes (VK)
+- Add new custom metric regions (eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth)
+- Enable strict SSL validation for AppInsights Ruby SDK
+- Turn off custom metrics upload for unsupported cluster types
+- Install CA certs from wire server for windows (in certain clouds)
+
 ### 09/16/2020 -
 > Note: This agent release targetted ONLY for non-AKS clusters via Azure Monitor for containers HELM chart update
 ##### Version microsoft/oms:ciprod09162020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod09162020 (linux)
diff --git a/build/version b/build/version
index b53b0dcfb..9587328de 100644
--- a/build/version
+++ b/build/version
@@ -3,10 +3,10 @@
 # Build Version Information
 
 CONTAINER_BUILDVERSION_MAJOR=10
-CONTAINER_BUILDVERSION_MINOR=0
+CONTAINER_BUILDVERSION_MINOR=1
 CONTAINER_BUILDVERSION_PATCH=0
-CONTAINER_BUILDVERSION_BUILDNR=5
-CONTAINER_BUILDVERSION_DATE=20200916
+CONTAINER_BUILDVERSION_BUILDNR=0
+CONTAINER_BUILDVERSION_DATE=20201005
 CONTAINER_BUILDVERSION_STATUS=Developer_Build
 
 #-------------------------------- End of File -----------------------------------
diff --git a/charts/azuremonitor-containers/Chart.yaml b/charts/azuremonitor-containers/Chart.yaml
index 1d3fed86f..6d45b05d8 100644
--- a/charts/azuremonitor-containers/Chart.yaml
+++ b/charts/azuremonitor-containers/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v1
 appVersion: 7.0.0-1
 description: Helm chart for deploying Azure Monitor container monitoring agent in Kubernetes
 name: azuremonitor-containers
-version: 2.7.6
+version: 2.7.7
 kubeVersion: "^1.10.0-0"
 keywords:
   - monitoring
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 2711cb372..f841dc5d7 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -12,10 +12,10 @@ Azure:
 omsagent:
   image:
     repo: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod"
-    tag: "ciprod09252020"
-    tagWindows: "win-ciprod09252020"
+    tag: "ciprod10052020"
+    tagWindows: "win-ciprod10052020"
     pullPolicy: IfNotPresent
-    dockerProviderVersion: "10.0.0-6"
+    dockerProviderVersion: "10.1.0-0"
     agentVersion: "1.10.0.1"
   ## To get your workspace id and key do the following
   ## You can create a Azure Loganalytics workspace from portal.azure.com and get its ID & PRIMARY KEY from 'Advanced Settings' tab in the Ux.
diff --git a/kubernetes/linux/Dockerfile b/kubernetes/linux/Dockerfile
index ee35cd556..f4324a18a 100644
--- a/kubernetes/linux/Dockerfile
+++ b/kubernetes/linux/Dockerfile
@@ -2,7 +2,7 @@ FROM ubuntu:18.04
 MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
-ARG IMAGE_TAG=ciprod09162020
+ARG IMAGE_TAG=ciprod10052020
 ENV AGENT_VERSION ${IMAGE_TAG}
 ENV tmpdir /opt
 ENV APPLICATIONINSIGHTS_AUTH NzAwZGM5OGYtYTdhZC00NThkLWI5NWMtMjA3ZjM3NmM3YmRi
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index e8352e020..18bc203d4 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -337,13 +337,13 @@ spec:
         tier: node
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.0.0-5"
+        dockerProviderVersion: "10.1.0-0"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod09162020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10052020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -494,13 +494,13 @@ spec:
         rsName: "omsagent-rs"
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.0.0-5"
+        dockerProviderVersion: "10.1.0-0"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod09162020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10052020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -640,13 +640,13 @@ spec:
         tier: node-win
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.0.0-5"
+        dockerProviderVersion: "10.1.0-0"
         schema-versions: "v1"
     spec:
      serviceAccountName: omsagent
      containers:
        - name: omsagent-win
-         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod09162020"
+         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod10052020"
          imagePullPolicy: IfNotPresent
          resources:
           limits:
diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index ca89d1c80..c7dee60af 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -3,7 +3,7 @@ MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
 
-ARG IMAGE_TAG=win-ciprod09162020
+ARG IMAGE_TAG=win-ciprod10052020
 
 # Do not split this into multiple RUN!
 # Docker creates a layer for every RUN-Statement

From 4b47f44491a77d7321cbbba6e5d2941326b06159 Mon Sep 17 00:00:00 2001
From: saaror <31900410+saaror@users.noreply.github.com>
Date: Mon, 12 Oct 2020 16:49:16 -0700
Subject: [PATCH 33/60] Update onboarding_instructions.md (#456)

* Update onboarding_instructions.md

Updated the documentation to reflect where to update the config map.

* Update onboarding_instructions.md

* Update onboarding_instructions.md

* Update onboarding_instructions.md

Updated the link
---
 Health/onboarding_instructions.md | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/Health/onboarding_instructions.md b/Health/onboarding_instructions.md
index 9c07b2167..4c83577b5 100644
--- a/Health/onboarding_instructions.md
+++ b/Health/onboarding_instructions.md
@@ -6,12 +6,28 @@ For on-boarding to Health(Tab), you would need to complete two steps
 
 
 ## Configure agent through ConfigMap
-1. Include the following section in ConfigMap yaml file
-```cmd:agent-settings: |-
-    [agent_settings.health_model]
+1. If you are configuring your existing ConfigMap, append the following section in your existing ConfigMap yaml file
+```
+#Append this section in your existing configmap
+agent-settings: |-
+       # agent health model feature settings   
+    [agent_settings.health_model]   
+      # In the absence of this configmap, default value for enabled is false   
+      enabled = true
+```
+2. Else if you don't have ConfigMap, download the new ConfigMap from [here.](https://github.com/microsoft/Docker-Provider/blob/ci_prod/kubernetes/container-azm-ms-agentconfig.yaml) & then set `enabled =true`
+      
+```
+#For new downloaded configmap enabled this default setting to true
+agent-settings: |-
+       # agent health model feature settings   
+    [agent_settings.health_model]   
+      # In the absence of this configmap, default value for enabled is false   
       enabled = true
 ```
-2. Run the following kubectl command:
+
+
+3. Run the following kubectl command:
    `kubectl apply -f <configmap_yaml_file.yaml>`
    
 Example: `kubectl apply -f container-azm-ms-agentconfig.yaml`.

From 3f86b23523da9082e1a36faec00af992994622cb Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 19 Oct 2020 12:42:22 -0700
Subject: [PATCH 34/60] chart update for sept2020 release (#457)

---
 scripts/onboarding/managed/enable-monitoring.ps1 | 2 +-
 scripts/onboarding/managed/enable-monitoring.sh  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/onboarding/managed/enable-monitoring.ps1 b/scripts/onboarding/managed/enable-monitoring.ps1
index 1e1669400..4815dc958 100644
--- a/scripts/onboarding/managed/enable-monitoring.ps1
+++ b/scripts/onboarding/managed/enable-monitoring.ps1
@@ -60,7 +60,7 @@ $isUsingServicePrincipal = $false
 
 # released chart version in mcr
 $mcr = "mcr.microsoft.com"
-$mcrChartVersion = "2.7.6"
+$mcrChartVersion = "2.7.7"
 $mcrChartRepoPath = "azuremonitor/containerinsights/preview/azuremonitor-containers"
 $helmLocalRepoName = "."
 
diff --git a/scripts/onboarding/managed/enable-monitoring.sh b/scripts/onboarding/managed/enable-monitoring.sh
index ce62a581a..d7edf49dc 100644
--- a/scripts/onboarding/managed/enable-monitoring.sh
+++ b/scripts/onboarding/managed/enable-monitoring.sh
@@ -42,7 +42,7 @@ set -o pipefail
 defaultAzureCloud="AzureCloud"
 
 # released chart version in mcr
-mcrChartVersion="2.7.6"
+mcrChartVersion="2.7.7"
 mcr="mcr.microsoft.com"
 mcrChartRepoPath="azuremonitor/containerinsights/preview/azuremonitor-containers"
 helmLocalRepoName="."

From 6203c3a0dd3a1deafd39aaa18e08968f01f45ab8 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 19 Oct 2020 16:58:12 -0700
Subject: [PATCH 35/60] add missing version update in the script (#458)

---
 scripts/onboarding/managed/upgrade-monitoring.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/onboarding/managed/upgrade-monitoring.sh b/scripts/onboarding/managed/upgrade-monitoring.sh
index 8a12b2f02..23594c7bc 100644
--- a/scripts/onboarding/managed/upgrade-monitoring.sh
+++ b/scripts/onboarding/managed/upgrade-monitoring.sh
@@ -20,7 +20,7 @@ set -e
 set -o pipefail
 
 # released chart version for Azure Arc enabled Kubernetes public preview
-mcrChartVersion="2.7.6"
+mcrChartVersion="2.7.7"
 mcr="mcr.microsoft.com"
 mcrChartRepoPath="azuremonitor/containerinsights/preview/azuremonitor-containers"
 

From 5b154691ba558c1257e15879b3a6f34655a3fc45 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 27 Oct 2020 12:54:03 -0700
Subject: [PATCH 36/60] November release fixes - activate one agent, adx schema
 v2, win perf issue, syslog deactivation (#459)

* activate one agent, adx schema v2, win perf issue, syslog deactivation

* update chart
---
 .../linux/installer/scripts/livenessprobe.sh  |  14 +-
 .../templates/omsagent-daemonset-windows.yaml |   3 +-
 .../templates/omsagent-daemonset.yaml         |   2 +-
 charts/azuremonitor-containers/values.yaml    |   6 +-
 kubernetes/linux/main.sh                      | 123 ++++++++++++++----
 kubernetes/linux/setup.sh                     |   4 +-
 kubernetes/omsagent.yaml                      |  10 +-
 source/plugins/go/src/oms.go                  |  62 +++++----
 source/plugins/go/src/utils.go                |   2 +-
 .../plugins/ruby/CAdvisorMetricsAPIClient.rb  |   4 +-
 source/plugins/ruby/constants.rb              |   2 +
 11 files changed, 165 insertions(+), 67 deletions(-)

diff --git a/build/linux/installer/scripts/livenessprobe.sh b/build/linux/installer/scripts/livenessprobe.sh
index 87f68a560..e3f9fb475 100644
--- a/build/linux/installer/scripts/livenessprobe.sh
+++ b/build/linux/installer/scripts/livenessprobe.sh
@@ -4,15 +4,25 @@
 (ps -ef | grep omsagent- | grep -v "grep")
 if [ $? -ne 0 ]
 then
- echo "Agent is NOT running" > /dev/termination-log
+ echo " omsagent is not running" > /dev/termination-log
  exit 1
 fi
 
+#optionally test to exit non zero value if oneagent is not running
+if [ -e "/opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2" ]; then
+  (ps -ef | grep "mdsd -l" | grep -v "grep")
+  if [ $? -ne 0 ]
+  then
+   echo "oneagent is not running" > /dev/termination-log
+   exit 1
+  fi
+fi
+
 #test to exit non zero value if fluentbit is not running
 (ps -ef | grep td-agent-bit | grep -v "grep")
 if [ $? -ne 0 ]
 then
- echo "Fluentbit is NOT running" > /dev/termination-log
+ echo "Fluentbit is not running" > /dev/termination-log
  exit 1
 fi
 
diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
index e65f9a98d..c916fadf6 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
@@ -46,7 +46,7 @@ spec:
        {{- end }}
        imagePullPolicy: IfNotPresent
        resources:
-{{ toYaml .Values.omsagent.resources.daemonset | indent 9 }}
+{{ toYaml .Values.omsagent.resources.daemonset-windows | indent 9 }}
        env:
        {{- if ne .Values.omsagent.env.clusterId "<your_cluster_id>" }}
        - name: AKS_RESOURCE_ID
@@ -96,6 +96,7 @@ spec:
               - C:\opt\omsagentwindows\scripts\cmd\livenessProbe.cmd
           periodSeconds: 60
           initialDelaySeconds: 180
+          timeoutSeconds: 15
    {{- with .Values.omsagent.tolerations }}
    tolerations: {{- toYaml . | nindent 8 }}
    {{- end }}
diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
index 438294ce5..8af13b6ee 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
@@ -40,7 +40,7 @@ spec:
        {{- end }}
        imagePullPolicy: IfNotPresent
        resources:
-{{ toYaml .Values.omsagent.resources.daemonset | indent 9 }}
+{{ toYaml .Values.omsagent.resources.daemonset-linux | indent 9 }}
        env:
        {{- if ne .Values.omsagent.env.clusterId "<your_cluster_id>" }}
        - name: AKS_RESOURCE_ID
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index f841dc5d7..fa01c05bd 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -116,13 +116,17 @@ omsagent:
   ## ref: http://kubernetes.io/docs/user-guide/compute-resources/
   ##
   resources:
-    daemonset:
+    daemonset-linux:
       requests:
         cpu: 75m
         memory: 225Mi
       limits:
         cpu: 150m
         memory: 600Mi
+    daemonset-windows:
+      limits:
+        cpu: 200m
+        memory: 600Mi
     deployment:
       requests:
         cpu: 150m
diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh
index 11972f0f4..b093eb74b 100644
--- a/kubernetes/linux/main.sh
+++ b/kubernetes/linux/main.sh
@@ -416,6 +416,97 @@ echo "DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION"
 export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION
 echo "export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION" >> ~/.bashrc
 
+#region check to auto-activate oneagent, to route container logs,
+#Intent is to activate one agent routing for all managed clusters with region in the regionllist, unless overridden by configmap
+# AZMON_CONTAINER_LOGS_ROUTE  will have route (if any) specified in the config map 
+# AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE will have the final route that we compute & set, based on our region list logic
+echo "************start oneagent log routing checks************"
+# by default, use configmap route for safer side
+AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$AZMON_CONTAINER_LOGS_ROUTE
+
+#trim region list
+oneagentregions="$(echo $AZMON_CONTAINERLOGS_ONEAGENT_REGIONS | xargs)"
+#lowercase region list
+typeset -l oneagentregions=$oneagentregions
+echo "oneagent regions: $oneagentregions"
+#trim current region
+currentregion="$(echo $AKS_REGION | xargs)"
+#lowercase current region
+typeset -l currentregion=$currentregion
+echo "current region: $currentregion"
+
+#initilze isoneagentregion as false
+isoneagentregion=false
+
+#set isoneagentregion as true if matching region is found
+if [ ! -z $oneagentregions ] && [ ! -z $currentregion ]; then
+  for rgn in $(echo $oneagentregions | sed "s/,/ /g"); do
+    if [ "$rgn" == "$currentregion" ]; then
+          isoneagentregion=true
+          echo "current region is in oneagent regions..."
+          break
+    fi
+  done
+else
+  echo "current region is not in oneagent regions..."
+fi
+
+if [ "$isoneagentregion" = true ]; then 
+   #if configmap has a routing for logs, but current region is in the oneagent region list, take the configmap route
+   if [ ! -z $AZMON_CONTAINER_LOGS_ROUTE ]; then   
+      AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$AZMON_CONTAINER_LOGS_ROUTE
+      echo "oneagent region is true for current region:$currentregion and config map logs route is not empty. so using config map logs route as effective route:$AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
+   else #there is no configmap route, so route thru oneagent
+      AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE="v2"
+      echo "oneagent region is true for current region:$currentregion and config map logs route is empty. so using oneagent as effective route:$AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
+   fi
+else
+   echo "oneagent region is false for current region:$currentregion"
+fi
+
+
+#start oneagent
+if [ ! -e "/etc/config/kube.conf" ]; then
+   if [ ! -z $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE ]; then
+      echo "container logs configmap route is $AZMON_CONTAINER_LOGS_ROUTE"
+      echo "container logs effective route is $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
+      #trim
+      containerlogsroute="$(echo $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE | xargs)"
+      # convert to lowercase
+      typeset -l containerlogsroute=$containerlogsroute
+
+      echo "setting AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE as :$containerlogsroute"
+      export AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$containerlogsroute
+      echo "export AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$containerlogsroute" >> ~/.bashrc
+      source ~/.bashrc
+
+      if [ "$containerlogsroute" == "v2" ]; then
+            echo "activating oneagent..."
+            echo "configuring mdsd..."
+            cat /etc/mdsd.d/envmdsd | while read line; do
+                  echo $line >> ~/.bashrc
+            done
+            source /etc/mdsd.d/envmdsd
+
+            echo "setting mdsd workspaceid & key for workspace:$CIWORKSPACE_id"
+            export CIWORKSPACE_id=$CIWORKSPACE_id
+            echo "export CIWORKSPACE_id=$CIWORKSPACE_id" >> ~/.bashrc
+            export CIWORKSPACE_key=$CIWORKSPACE_key
+            echo "export CIWORKSPACE_key=$CIWORKSPACE_key" >> ~/.bashrc
+
+            source ~/.bashrc
+
+            dpkg -l | grep mdsd | awk '{print $2 " " $3}'
+
+            echo "starting mdsd ..."
+            mdsd -l -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos &
+            
+            touch /opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2
+      fi
+   fi
+fi
+echo "************end oneagent log routing checks************"
+
 #telegraf & fluentbit requirements
 if [ ! -e "/etc/config/kube.conf" ]; then
       if [ "$CONTAINER_RUNTIME" == "docker" ]; then
@@ -491,37 +582,13 @@ dpkg -l | grep td-agent-bit | awk '{print $2 " " $3}'
 
 #dpkg -l | grep telegraf | awk '{print $2 " " $3}'
 
-#start oneagent
-if [ ! -e "/etc/config/kube.conf" ]; then
-   if [ ! -z $AZMON_CONTAINER_LOGS_ROUTE ]; then
-      echo "container logs route is defined as $AZMON_CONTAINER_LOGS_ROUTE"
-      #trim
-      containerlogsroute="$(echo $AZMON_CONTAINER_LOGS_ROUTE | xargs)"
-      # convert to lowercase
-      typeset -l containerlogsroute=$containerlogsroute
-      if [ "$containerlogsroute" == "v2" ]; then
-            echo "containerlogsroute $containerlogsroute"
-            echo "configuring mdsd..."
-            cat /etc/mdsd.d/envmdsd | while read line; do
-                  echo $line >> ~/.bashrc
-            done
-            source /etc/mdsd.d/envmdsd
 
-            echo "setting mdsd workspaceid & key for workspace:$CIWORKSPACE_id"
-            export CIWORKSPACE_id=$CIWORKSPACE_id
-            echo "export CIWORKSPACE_id=$CIWORKSPACE_id" >> ~/.bashrc
-            export CIWORKSPACE_key=$CIWORKSPACE_key
-            echo "export CIWORKSPACE_key=$CIWORKSPACE_key" >> ~/.bashrc
 
-            source ~/.bashrc
+echo "stopping rsyslog..."
+service rsyslog stop
 
-            dpkg -l | grep mdsd | awk '{print $2 " " $3}'
-
-            echo "starting mdsd ..."
-            mdsd -l -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos &
-      fi
-   fi
-fi
+echo "getting rsyslog status..."
+service rsyslog status
 
 shutdown() {
 	/opt/microsoft/omsagent/bin/service_control stop
diff --git a/kubernetes/linux/setup.sh b/kubernetes/linux/setup.sh
index 67a981dfa..fb41d4782 100644
--- a/kubernetes/linux/setup.sh
+++ b/kubernetes/linux/setup.sh
@@ -31,8 +31,8 @@ mv $TMPDIR/omsbundle* $TMPDIR/omsbundle
 /usr/bin/dpkg -i $TMPDIR/omsbundle/110/omsagent*.deb
 #/usr/bin/dpkg -i $TMPDIR/omsbundle/100/omsconfig*.deb
 
-#install oneagent - Latest dev bits (7/17)
-wget https://github.com/microsoft/Docker-Provider/releases/download/7172020-oneagent/azure-mdsd_1.5.124-build.develop.1294_x86_64.deb
+#install oneagent - Official bits (10/18)
+wget https://github.com/microsoft/Docker-Provider/releases/download/10182020-oneagent/azure-mdsd_1.5.126-build.master.99_x86_64.deb
 /usr/bin/dpkg -i $TMPDIR/azure-mdsd*.deb
 cp -f $TMPDIR/mdsd.xml /etc/mdsd.d
 cp -f $TMPDIR/envmdsd /etc/mdsd.d
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 18bc203d4..61f89b808 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -347,7 +347,7 @@ spec:
           imagePullPolicy: IfNotPresent
           resources:
             limits:
-              cpu: 150m
+              cpu: 250m
               memory: 600Mi
             requests:
               cpu: 75m
@@ -370,6 +370,8 @@ spec:
             # Update this with the user assigned msi client id for omsagent
             - name: USER_ASSIGNED_IDENTITY_CLIENT_ID
               value: ""
+            - name: AZMON_CONTAINERLOGS_ONEAGENT_REGIONS
+              value: "koreacentral,norwayeast"
           securityContext:
             privileged: true
           ports:
@@ -650,11 +652,8 @@ spec:
          imagePullPolicy: IfNotPresent
          resources:
           limits:
-           cpu: 150m
+           cpu: 200m
            memory: 600Mi
-          requests:
-           cpu: 75m
-           memory: 225Mi
          env:
           # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these
           - name: AKS_RESOURCE_ID
@@ -696,6 +695,7 @@ spec:
               - C:\opt\omsagentwindows\scripts\cmd\livenessProbe.cmd
           periodSeconds: 60
           initialDelaySeconds: 180
+          timeoutSeconds: 15
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/source/plugins/go/src/oms.go b/source/plugins/go/src/oms.go
index 63ca6de10..5a678781c 100644
--- a/source/plugins/go/src/oms.go
+++ b/source/plugins/go/src/oms.go
@@ -194,15 +194,15 @@ type DataItem struct {
 }
 
 type DataItemADX struct {
-	LogEntry              string `json:"LogEntry"`
-	LogEntrySource        string `json:"LogEntrySource"`
-	LogEntryTimeStamp     string `json:"LogEntryTimeStamp"`
-	LogEntryTimeOfCommand string `json:"TimeOfCommand"`
-	ID                    string `json:"Id"`
-	Image                 string `json:"Image"`
-	Name                  string `json:"Name"`
-	SourceSystem          string `json:"SourceSystem"`
+	TimeGenerated         string `json:"TimeGenerated"`
 	Computer              string `json:"Computer"`
+	ContainerID           string `json:"ContainerID"`
+	ContainerName         string `json:"ContainerName"`
+	PodName				  string `json:"PodName"`
+	PodNamespace          string `json:"PodNamespace"`
+	LogMessage            string `json:"LogMessage"`
+	LogSource             string `json:"LogSource"`
+	//PodLabels			  string `json:"PodLabels"`
 	AzureResourceId       string `json:"AzureResourceId"`
 }
 
@@ -422,7 +422,7 @@ func convert(in interface{}) (float64, bool) {
 func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType KubeMonAgentEventType) {
 	var logRecordString = ToString(record["log"])
 	var eventTimeStamp = ToString(record["time"])
-	containerID, _, podName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"]))
+	containerID, _, podName, _ := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"]))
 
 	Log("Locked EventHashUpdateMutex for updating hash \n ")
 	EventHashUpdateMutex.Lock()
@@ -816,7 +816,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 	DataUpdateMutex.Unlock()
 
 	for _, record := range tailPluginRecords {
-		containerID, k8sNamespace, _ := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"]))
+		containerID, k8sNamespace, k8sPodName, containerName := GetContainerIDK8sNamespacePodNameFromFileName(ToString(record["filepath"]))
 		logEntrySource := ToString(record["stream"])
 
 		if strings.EqualFold(logEntrySource, "stdout") {
@@ -867,16 +867,18 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 			if ResourceCentric == true {
 				stringMap["AzureResourceId"] = ResourceID
 			}
+			stringMap["PodName"] = k8sPodName
+			stringMap["PodNamespace"] = k8sNamespace
+			stringMap["ContainerName"] = containerName
 			dataItemADX = DataItemADX{
-				ID:                    stringMap["Id"],
-				LogEntry:              stringMap["LogEntry"],
-				LogEntrySource:        stringMap["LogEntrySource"],
-				LogEntryTimeStamp:     stringMap["LogEntryTimeStamp"],
-				LogEntryTimeOfCommand: stringMap["TimeOfCommand"],
-				SourceSystem:          stringMap["SourceSystem"],
+				TimeGenerated:         stringMap["LogEntryTimeStamp"],
 				Computer:              stringMap["Computer"],
-				Image:                 stringMap["Image"],
-				Name:                  stringMap["Name"],
+				ContainerID:           stringMap["Id"],
+				ContainerName:         stringMap["ContainerName"],
+				PodName:               stringMap["PodName"],
+				PodNamespace:          stringMap["PodNamespace"],
+				LogMessage:            stringMap["LogEntry"],
+				LogSource:             stringMap["LogEntrySource"],
 				AzureResourceId:       stringMap["AzureResourceId"],
 			}
 			//ADX
@@ -1018,7 +1020,7 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int {
 		//ADXFlushMutex.Lock()
 		//defer ADXFlushMutex.Unlock()
 		//MultiJSON support is not there yet
-		if ingestionErr := ADXIngestor.FromReader(ctx, r, ingest.IngestionMappingRef("ContainerLogMapping", ingest.JSON), ingest.FileFormat(ingest.JSON), ingest.FlushImmediately()); ingestionErr != nil {
+		if ingestionErr := ADXIngestor.FromReader(ctx, r, ingest.IngestionMappingRef("ContainerLogv2Mapping", ingest.JSON), ingest.FileFormat(ingest.JSON)); ingestionErr != nil {
 			Log("Error when streaming to ADX Ingestion: %s", ingestionErr.Error())
 			//ADXIngestor = nil  //not required as per ADX team. Will keep it to indicate that we tried this approach
 
@@ -1107,12 +1109,13 @@ func containsKey(currentMap map[string]bool, key string) bool {
 	return c
 }
 
-// GetContainerIDK8sNamespacePodNameFromFileName Gets the container ID, k8s namespace and pod name From the file Name
+// GetContainerIDK8sNamespacePodNameFromFileName Gets the container ID, k8s namespace, pod name and containername From the file Name
 // sample filename kube-proxy-dgcx7_kube-system_kube-proxy-8df7e49e9028b60b5b0d0547f409c455a9567946cf763267b7e6fa053ab8c182.log
-func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, string, string) {
+func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, string, string, string) {
 	id := ""
 	ns := ""
 	podName := ""
+	containerName := ""
 
 	start := strings.LastIndex(filename, "-")
 	end := strings.LastIndex(filename, ".")
@@ -1132,6 +1135,15 @@ func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, str
 		ns = filename[start+1 : end]
 	}
 
+	start = strings.LastIndex(filename, "_")
+	end = strings.LastIndex(filename, "-")
+
+	if start >= end || start == -1 || end == -1 {
+		containerName = ""
+	} else {
+		containerName = filename[start+1 : end]
+	}
+
 	start = strings.Index(filename, "/containers/")
 	end = strings.Index(filename, "_")
 
@@ -1141,7 +1153,7 @@ func GetContainerIDK8sNamespacePodNameFromFileName(filename string) (string, str
 		podName = filename[(start + len("/containers/")):end]
 	}
 
-	return id, ns, podName
+	return id, ns, podName, containerName
 }
 
 // InitializePlugin reads and populates plugin configuration
@@ -1313,8 +1325,8 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 
 	CreateHTTPClient()
 
-	ContainerLogsRoute := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOGS_ROUTE")))
-	Log("AZMON_CONTAINER_LOGS_ROUTE:%s", ContainerLogsRoute)
+	ContainerLogsRoute := strings.TrimSpace(strings.ToLower(os.Getenv("AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE")))
+	Log("AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE:%s", ContainerLogsRoute)
 
 	ContainerLogsRouteV2 = false  //default is ODS
 	ContainerLogsRouteADX = false //default is LA
@@ -1365,7 +1377,7 @@ func InitializePlugin(pluginConfPath string, agentVersion string) {
 	if strings.Compare(strings.ToLower(os.Getenv("CONTROLLER_TYPE")), "daemonset") == 0 {
 		populateExcludedStdoutNamespaces()
 		populateExcludedStderrNamespaces()
-		if enrichContainerLogs == true {
+		if enrichContainerLogs == true && ContainerLogsRouteADX != true {
 			Log("ContainerLogEnrichment=true; starting goroutine to update containerimagenamemaps \n")
 			go updateContainerImageNameMaps()
 		} else {
diff --git a/source/plugins/go/src/utils.go b/source/plugins/go/src/utils.go
index 8b1a3df65..91791ae1a 100644
--- a/source/plugins/go/src/utils.go
+++ b/source/plugins/go/src/utils.go
@@ -145,7 +145,7 @@ func CreateADXClient() {
 		//log.Fatalf("Unable to create ADX connection %s", err.Error())
 	} else {
 		Log("Successfully created ADX Client. Creating Ingestor...")
-		ingestor, ingestorErr := ingest.New(client, "containerinsights", "ContainerLog")
+		ingestor, ingestorErr := ingest.New(client, "containerinsights", "ContainerLogv2")
 		if ingestorErr != nil {
 			Log("Error::mdsd::Unable to create ADX ingestor %s", ingestorErr.Error())
 		} else {
diff --git a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
index 9e0935480..67bd61667 100644
--- a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
+++ b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
@@ -248,7 +248,9 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
                       telemetryProps["dsPromUrl"] = @dsPromUrlCount
                     end
                     #telemetry about containerlogs Routing for daemonset
-                    if (!@containerLogsRoute.nil? && !@containerLogsRoute.empty?)
+                    if File.exist?(Constants::AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2_FILENAME)
+                      telemetryProps["containerLogsRoute"] = "v2"
+                    elsif (!@containerLogsRoute.nil? && !@containerLogsRoute.empty?)
                       telemetryProps["containerLogsRoute"] = @containerLogsRoute
                     end
                      #telemetry about health model
diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index 35e5f9334..0e5099c5e 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -91,4 +91,6 @@ class Constants
 
   #Pod Statuses
   POD_STATUS_TERMINATING = "Terminating"
+
+  AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2_FILENAME = "/opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2"
 end

From 157ba201f426a0f53193a9eb26a6ad650edc9442 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 27 Oct 2020 20:17:03 -0700
Subject: [PATCH 37/60] remove hiphen for params in chart (#462)

Merging as its a simple fix (remove hiphen)
---
 .../templates/omsagent-daemonset-windows.yaml                 | 2 +-
 .../azuremonitor-containers/templates/omsagent-daemonset.yaml | 2 +-
 charts/azuremonitor-containers/values.yaml                    | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
index c916fadf6..6a309c121 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
@@ -46,7 +46,7 @@ spec:
        {{- end }}
        imagePullPolicy: IfNotPresent
        resources:
-{{ toYaml .Values.omsagent.resources.daemonset-windows | indent 9 }}
+{{ toYaml .Values.omsagent.resources.daemonsetwindows | indent 9 }}
        env:
        {{- if ne .Values.omsagent.env.clusterId "<your_cluster_id>" }}
        - name: AKS_RESOURCE_ID
diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
index 8af13b6ee..d57c4d82b 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
@@ -40,7 +40,7 @@ spec:
        {{- end }}
        imagePullPolicy: IfNotPresent
        resources:
-{{ toYaml .Values.omsagent.resources.daemonset-linux | indent 9 }}
+{{ toYaml .Values.omsagent.resources.daemonsetlinux | indent 9 }}
        env:
        {{- if ne .Values.omsagent.env.clusterId "<your_cluster_id>" }}
        - name: AKS_RESOURCE_ID
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index fa01c05bd..774e6203f 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -116,14 +116,14 @@ omsagent:
   ## ref: http://kubernetes.io/docs/user-guide/compute-resources/
   ##
   resources:
-    daemonset-linux:
+    daemonsetlinux:
       requests:
         cpu: 75m
         memory: 225Mi
       limits:
         cpu: 150m
         memory: 600Mi
-    daemonset-windows:
+    daemonsetwindows:
       limits:
         cpu: 200m
         memory: 600Mi

From 7c448bc5f561b2a72c33c689dda0db893bd41038 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Tue, 27 Oct 2020 21:22:34 -0700
Subject: [PATCH 38/60] Changes for cutting a new build for ciprod10272020
 release (#460)

---
 ReleaseNotes.md                                  | 10 ++++++++++
 build/version                                    |  6 +++---
 charts/azuremonitor-containers/Chart.yaml        |  2 +-
 charts/azuremonitor-containers/values.yaml       |  6 +++---
 kubernetes/linux/Dockerfile                      |  2 +-
 kubernetes/omsagent.yaml                         | 12 ++++++------
 kubernetes/windows/Dockerfile                    |  2 +-
 scripts/onboarding/managed/enable-monitoring.ps1 |  2 +-
 scripts/onboarding/managed/enable-monitoring.sh  |  2 +-
 scripts/onboarding/managed/upgrade-monitoring.sh |  2 +-
 10 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index e1892d083..eb8e282b9 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -11,6 +11,16 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
 
+### 10/27/2020 -
+##### Version microsoft/oms:ciprod10272020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10272020 (linux)
+##### Version microsoft/oms:win-ciprod10272020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod10052020 (windows)
+##### Code change log
+- Activate oneagent in few AKS regions (koreacentral,norwayeast)
+- Disable syslog
+- Fix timeout for Windows daemonset liveness probe
+- Make request == limit for Windows daemonset resources (cpu & memory)
+- Schema v2 for container log (ADX only - applicable only for select customers for piloting)
+
 ### 10/05/2020 -
 ##### Version microsoft/oms:ciprod10052020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10052020 (linux)
 ##### Version microsoft/oms:win-ciprod10052020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod10052020 (windows)
diff --git a/build/version b/build/version
index 9587328de..71c70020e 100644
--- a/build/version
+++ b/build/version
@@ -2,11 +2,11 @@
 
 # Build Version Information
 
-CONTAINER_BUILDVERSION_MAJOR=10
-CONTAINER_BUILDVERSION_MINOR=1
+CONTAINER_BUILDVERSION_MAJOR=11
+CONTAINER_BUILDVERSION_MINOR=0
 CONTAINER_BUILDVERSION_PATCH=0
 CONTAINER_BUILDVERSION_BUILDNR=0
-CONTAINER_BUILDVERSION_DATE=20201005
+CONTAINER_BUILDVERSION_DATE=20201027
 CONTAINER_BUILDVERSION_STATUS=Developer_Build
 
 #-------------------------------- End of File -----------------------------------
diff --git a/charts/azuremonitor-containers/Chart.yaml b/charts/azuremonitor-containers/Chart.yaml
index 6d45b05d8..bc35690e4 100644
--- a/charts/azuremonitor-containers/Chart.yaml
+++ b/charts/azuremonitor-containers/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v1
 appVersion: 7.0.0-1
 description: Helm chart for deploying Azure Monitor container monitoring agent in Kubernetes
 name: azuremonitor-containers
-version: 2.7.7
+version: 2.7.8
 kubeVersion: "^1.10.0-0"
 keywords:
   - monitoring
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 774e6203f..0f07a98c1 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -12,10 +12,10 @@ Azure:
 omsagent:
   image:
     repo: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod"
-    tag: "ciprod10052020"
-    tagWindows: "win-ciprod10052020"
+    tag: "ciprod10272020"
+    tagWindows: "win-ciprod10272020"
     pullPolicy: IfNotPresent
-    dockerProviderVersion: "10.1.0-0"
+    dockerProviderVersion: "11.0.0-0"
     agentVersion: "1.10.0.1"
   ## To get your workspace id and key do the following
   ## You can create a Azure Loganalytics workspace from portal.azure.com and get its ID & PRIMARY KEY from 'Advanced Settings' tab in the Ux.
diff --git a/kubernetes/linux/Dockerfile b/kubernetes/linux/Dockerfile
index f4324a18a..c3428a44a 100644
--- a/kubernetes/linux/Dockerfile
+++ b/kubernetes/linux/Dockerfile
@@ -2,7 +2,7 @@ FROM ubuntu:18.04
 MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
-ARG IMAGE_TAG=ciprod10052020
+ARG IMAGE_TAG=ciprod10272020
 ENV AGENT_VERSION ${IMAGE_TAG}
 ENV tmpdir /opt
 ENV APPLICATIONINSIGHTS_AUTH NzAwZGM5OGYtYTdhZC00NThkLWI5NWMtMjA3ZjM3NmM3YmRi
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 61f89b808..ca47d898d 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -337,13 +337,13 @@ spec:
         tier: node
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.1.0-0"
+        dockerProviderVersion: "11.0.0-0"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10052020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10272020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -496,13 +496,13 @@ spec:
         rsName: "omsagent-rs"
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.1.0-0"
+        dockerProviderVersion: "11.0.0-0"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10052020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10272020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -642,13 +642,13 @@ spec:
         tier: node-win
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "10.1.0-0"
+        dockerProviderVersion: "11.0.0-0"
         schema-versions: "v1"
     spec:
      serviceAccountName: omsagent
      containers:
        - name: omsagent-win
-         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod10052020"
+         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod10272020"
          imagePullPolicy: IfNotPresent
          resources:
           limits:
diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index c7dee60af..414817559 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -3,7 +3,7 @@ MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
 
-ARG IMAGE_TAG=win-ciprod10052020
+ARG IMAGE_TAG=win-ciprod10272020
 
 # Do not split this into multiple RUN!
 # Docker creates a layer for every RUN-Statement
diff --git a/scripts/onboarding/managed/enable-monitoring.ps1 b/scripts/onboarding/managed/enable-monitoring.ps1
index 4815dc958..22d34894f 100644
--- a/scripts/onboarding/managed/enable-monitoring.ps1
+++ b/scripts/onboarding/managed/enable-monitoring.ps1
@@ -60,7 +60,7 @@ $isUsingServicePrincipal = $false
 
 # released chart version in mcr
 $mcr = "mcr.microsoft.com"
-$mcrChartVersion = "2.7.7"
+$mcrChartVersion = "2.7.8"
 $mcrChartRepoPath = "azuremonitor/containerinsights/preview/azuremonitor-containers"
 $helmLocalRepoName = "."
 
diff --git a/scripts/onboarding/managed/enable-monitoring.sh b/scripts/onboarding/managed/enable-monitoring.sh
index d7edf49dc..e0d26c370 100644
--- a/scripts/onboarding/managed/enable-monitoring.sh
+++ b/scripts/onboarding/managed/enable-monitoring.sh
@@ -42,7 +42,7 @@ set -o pipefail
 defaultAzureCloud="AzureCloud"
 
 # released chart version in mcr
-mcrChartVersion="2.7.7"
+mcrChartVersion="2.7.8"
 mcr="mcr.microsoft.com"
 mcrChartRepoPath="azuremonitor/containerinsights/preview/azuremonitor-containers"
 helmLocalRepoName="."
diff --git a/scripts/onboarding/managed/upgrade-monitoring.sh b/scripts/onboarding/managed/upgrade-monitoring.sh
index 23594c7bc..4134d710f 100644
--- a/scripts/onboarding/managed/upgrade-monitoring.sh
+++ b/scripts/onboarding/managed/upgrade-monitoring.sh
@@ -20,7 +20,7 @@ set -e
 set -o pipefail
 
 # released chart version for Azure Arc enabled Kubernetes public preview
-mcrChartVersion="2.7.7"
+mcrChartVersion="2.7.8"
 mcr="mcr.microsoft.com"
 mcrChartRepoPath="azuremonitor/containerinsights/preview/azuremonitor-containers"
 

From 62b27d79ba9622a939b6d20e33292725bb2e9bef Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Thu, 29 Oct 2020 08:18:07 -0700
Subject: [PATCH 39/60] using latest stable version of msys2 (#465)

---
 kubernetes/windows/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index 414817559..c4545d705 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -10,7 +10,7 @@ ARG IMAGE_TAG=win-ciprod10272020
 RUN powershell -Command "Set-ExecutionPolicy Bypass -Scope Process -Force; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))"
 # Fluentd depends on cool.io whose fat gem is only available for Ruby < 2.5, so need to specify --platform ruby when install Ruby > 2.5 and install msys2 to get dev tools
 RUN choco install -y ruby --version 2.6.5.1 --params "'/InstallDir:C:\ruby26'" \
-&& choco install -y msys2 --version 20190524.0.0.20191030 --params "'/NoPath /NoUpdate /InstallDir:C:\ruby26\msys64'" \
+&& choco install -y msys2 --version 20200903.0.0 --params "'/NoPath /NoUpdate /InstallDir:C:\ruby26\msys64'" \
 && choco install -y vim
 
 # gangams - optional MSYS2 update via ridk failing in merged docker file so skipping that since we dont need optional update

From 909cc16348135c31f8d82af130a75f8bc54f7b6f Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 29 Oct 2020 14:48:00 -0700
Subject: [PATCH 40/60] fixing the windows-perf-dups (#466)

---
 source/plugins/ruby/in_win_cadvisor_perf.rb | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/source/plugins/ruby/in_win_cadvisor_perf.rb b/source/plugins/ruby/in_win_cadvisor_perf.rb
index 4e90195e5..9c267cf4f 100644
--- a/source/plugins/ruby/in_win_cadvisor_perf.rb
+++ b/source/plugins/ruby/in_win_cadvisor_perf.rb
@@ -10,7 +10,7 @@ class Win_CAdvisor_Perf_Input < Input
     def initialize
       super
       require "yaml"
-      require 'yajl/json_gem'
+      require "yajl/json_gem"
       require "time"
 
       require_relative "CAdvisorMetricsAPIClient"
@@ -52,8 +52,6 @@ def shutdown
     def enumerate()
       time = Time.now.to_f
       begin
-        eventStream = MultiEventStream.new
-        insightsMetricsEventStream = MultiEventStream.new
         timeDifference = (DateTime.now.to_time.to_i - @@winNodeQueryTimeTracker).abs
         timeDifferenceInMinutes = timeDifference / 60
         @@istestvar = ENV["ISTEST"]
@@ -70,6 +68,7 @@ def enumerate()
           @@winNodeQueryTimeTracker = DateTime.now.to_time.to_i
         end
         @@winNodes.each do |winNode|
+          eventStream = MultiEventStream.new
           metricData = CAdvisorMetricsAPIClient.getMetrics(winNode: winNode, metricTime: Time.now.utc.iso8601)
           metricData.each do |record|
             if !record.empty?
@@ -81,7 +80,6 @@ def enumerate()
           router.emit_stream(@tag, eventStream) if eventStream
           router.emit_stream(@mdmtag, eventStream) if eventStream
 
-          
           if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
             $log.info("winCAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
           end
@@ -90,6 +88,7 @@ def enumerate()
           begin
             containerGPUusageInsightsMetricsDataItems = []
             containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: winNode, metricTime: Time.now.utc.iso8601))
+            insightsMetricsEventStream = MultiEventStream.new
 
             containerGPUusageInsightsMetricsDataItems.each do |insightsMetricsRecord|
               wrapper = {
@@ -104,12 +103,12 @@ def enumerate()
             router.emit_stream(@mdmtag, insightsMetricsEventStream) if insightsMetricsEventStream
             if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
               $log.info("winCAdvisorInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-            end 
+            end
           rescue => errorStr
             $log.warn "Failed when processing GPU Usage metrics in_win_cadvisor_perf : #{errorStr}"
             $log.debug_backtrace(errorStr.backtrace)
             ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end 
+          end
           #end GPU InsightsMetrics items
 
         end

From d481c066df67ce9cf76d163c0776502f3989aea1 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Fri, 6 Nov 2020 00:02:52 -0800
Subject: [PATCH 41/60] chart updates related to new microsoft/charts repo
 (#467)

---
 charts/azuremonitor-containers/README.md       | 18 ++++++++++--------
 .../templates/NOTES.txt                        |  2 +-
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/charts/azuremonitor-containers/README.md b/charts/azuremonitor-containers/README.md
index 3b357ffd5..469fac94a 100644
--- a/charts/azuremonitor-containers/README.md
+++ b/charts/azuremonitor-containers/README.md
@@ -29,6 +29,8 @@ Monitoring your Kubernetes cluster and containers is critical, especially when r
 
 ## Installing the Chart
 
+> Note: If you want to customize the chart, fork the chart code in https://github.com/microsoft/Docker-Provider/tree/ci_prod/charts/azuremonitor-containers
+
 > Note: `--name` flag not required in Helm3 since this flag is deprecated
 
 > Note: use `omsagent.proxy` parameter to set the proxy endpoint if your K8s cluster configured behind the proxy. Refer to [configure proxy](#Configuring-Proxy-Endpoint) for more details about  proxy.
@@ -36,25 +38,25 @@ Monitoring your Kubernetes cluster and containers is critical, especially when r
 ### To Use Azure Log Analytics Workspace in Public Cloud
 
 ```bash
-$ helm repo add incubator https://kubernetes-charts-incubator.storage.googleapis.com/
+$ helm repo add microsoft https://microsoft.github.io/charts/repo
 $ helm install --name azmon-containers-release-1 \
---set omsagent.secret.wsid=<your_workspace_id>,omsagent.secret.key=<your_workspace_key>,omsagent.env.clusterName=<my_prod_cluster>  incubator/azuremonitor-containers
+--set omsagent.secret.wsid=<your_workspace_id>,omsagent.secret.key=<your_workspace_key>,omsagent.env.clusterName=<my_prod_cluster>  microsoft/azuremonitor-containers
 ```
 
 ### To Use Azure Log Analytics Workspace in Azure China Cloud
 
 ```bash
-$ helm repo add incubator https://kubernetes-charts-incubator.storage.googleapis.com/
+$ helm repo add microsoft https://microsoft.github.io/charts/repo
 $ helm install --name azmon-containers-release-1 \
---set omsagent.domain=opinsights.azure.cn,omsagent.secret.wsid=<your_workspace_id>,omsagent.secret.key=<your_workspace_key>,omsagent.env.clusterName=<your_cluster_name>  incubator/azuremonitor-containers
+--set omsagent.domain=opinsights.azure.cn,omsagent.secret.wsid=<your_workspace_id>,omsagent.secret.key=<your_workspace_key>,omsagent.env.clusterName=<your_cluster_name>  microsoft/azuremonitor-containers
 ```
 
 ### To Use Azure Log Analytics Workspace in Azure US Government Cloud
 
 ```bash
-$ helm repo add incubator https://kubernetes-charts-incubator.storage.googleapis.com/
+$ helm repo add microsoft https://microsoft.github.io/charts/repo
 $ helm install --name azmon-containers-release-1 \
---set omsagent.domain=opinsights.azure.us,omsagent.secret.wsid=<your_workspace_id>,omsagent.secret.key=<your_workspace_key>,omsagent.env.clusterName=<your_cluster_name>  incubator/azuremonitor-containers
+--set omsagent.domain=opinsights.azure.us,omsagent.secret.wsid=<your_workspace_id>,omsagent.secret.key=<your_workspace_key>,omsagent.env.clusterName=<your_cluster_name>  microsoft/azuremonitor-containers
 ```
 
 ## Upgrading an existing Release to a new version
@@ -112,13 +114,13 @@ Specify each parameter using the `--set key=value[,key=value]` argument to `helm
 
 $ helm install --name myrelease-1 \
 --set omsagent.secret.wsid=<your_workspace_id>,omsagent.secret.key=<your_workspace_key>,omsagent.env.clusterName=<your_cluster_name>
-  incubator/azuremonitor-containers
+  microsoft/azuremonitor-containers
 ```
 Alternatively, a YAML file that specifies the values for the parameters can be provided while installing the chart. For example,
 
 ```bash
 
-$ helm install --name myrelease-1 -f values.yaml incubator/azuremonitor-containers
+$ helm install --name myrelease-1 -f values.yaml microsoft/azuremonitor-containers
 
 ```
 
diff --git a/charts/azuremonitor-containers/templates/NOTES.txt b/charts/azuremonitor-containers/templates/NOTES.txt
index 372cecb95..48ebf33fc 100644
--- a/charts/azuremonitor-containers/templates/NOTES.txt
+++ b/charts/azuremonitor-containers/templates/NOTES.txt
@@ -29,7 +29,7 @@ This deployment will not complete. To proceed, run
    --set omsagent.secret.wsid=<your_workspace_id> \
    --set omsagent.secret.key=<your_workspace_key> \
    --set omsagent.env.clusterName=<your_cluster_name> \
-   incubator/azuremonitor-containers
+   microsoft/azuremonitor-containers
 
 {{- else -}}
 

From aff1e13c240836cea73f3913f098b2737f186b89 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Mon, 9 Nov 2020 13:18:02 -0800
Subject: [PATCH 42/60] Changes for creating 11092020 release (#468)

---
 ReleaseNotes.md                                  |  6 ++++++
 build/version                                    |  4 ++--
 charts/azuremonitor-containers/Chart.yaml        |  2 +-
 charts/azuremonitor-containers/values.yaml       |  6 +++---
 kubernetes/linux/Dockerfile                      |  2 +-
 kubernetes/omsagent.yaml                         | 12 ++++++------
 kubernetes/windows/Dockerfile                    |  2 +-
 scripts/onboarding/managed/enable-monitoring.ps1 |  2 +-
 scripts/onboarding/managed/enable-monitoring.sh  |  2 +-
 scripts/onboarding/managed/upgrade-monitoring.sh |  2 +-
 10 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index eb8e282b9..ddfd01314 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -11,6 +11,12 @@ additional questions or comments.
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
 
+### 11/09/2020 -
+##### Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020 (linux)
+##### Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod11092020 (windows)
+##### Code change log
+- Fix for duplicate windows metrics
+
 ### 10/27/2020 -
 ##### Version microsoft/oms:ciprod10272020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10272020 (linux)
 ##### Version microsoft/oms:win-ciprod10272020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod10052020 (windows)
diff --git a/build/version b/build/version
index 71c70020e..a8b78ecac 100644
--- a/build/version
+++ b/build/version
@@ -5,8 +5,8 @@
 CONTAINER_BUILDVERSION_MAJOR=11
 CONTAINER_BUILDVERSION_MINOR=0
 CONTAINER_BUILDVERSION_PATCH=0
-CONTAINER_BUILDVERSION_BUILDNR=0
-CONTAINER_BUILDVERSION_DATE=20201027
+CONTAINER_BUILDVERSION_BUILDNR=1
+CONTAINER_BUILDVERSION_DATE=20201109
 CONTAINER_BUILDVERSION_STATUS=Developer_Build
 
 #-------------------------------- End of File -----------------------------------
diff --git a/charts/azuremonitor-containers/Chart.yaml b/charts/azuremonitor-containers/Chart.yaml
index bc35690e4..987841f77 100644
--- a/charts/azuremonitor-containers/Chart.yaml
+++ b/charts/azuremonitor-containers/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v1
 appVersion: 7.0.0-1
 description: Helm chart for deploying Azure Monitor container monitoring agent in Kubernetes
 name: azuremonitor-containers
-version: 2.7.8
+version: 2.7.9
 kubeVersion: "^1.10.0-0"
 keywords:
   - monitoring
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 0f07a98c1..76ea0a26d 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -12,10 +12,10 @@ Azure:
 omsagent:
   image:
     repo: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod"
-    tag: "ciprod10272020"
-    tagWindows: "win-ciprod10272020"
+    tag: "ciprod11092020"
+    tagWindows: "win-ciprod11092020"
     pullPolicy: IfNotPresent
-    dockerProviderVersion: "11.0.0-0"
+    dockerProviderVersion: "11.0.0-1"
     agentVersion: "1.10.0.1"
   ## To get your workspace id and key do the following
   ## You can create a Azure Loganalytics workspace from portal.azure.com and get its ID & PRIMARY KEY from 'Advanced Settings' tab in the Ux.
diff --git a/kubernetes/linux/Dockerfile b/kubernetes/linux/Dockerfile
index c3428a44a..d04e86128 100644
--- a/kubernetes/linux/Dockerfile
+++ b/kubernetes/linux/Dockerfile
@@ -2,7 +2,7 @@ FROM ubuntu:18.04
 MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
-ARG IMAGE_TAG=ciprod10272020
+ARG IMAGE_TAG=ciprod11092020
 ENV AGENT_VERSION ${IMAGE_TAG}
 ENV tmpdir /opt
 ENV APPLICATIONINSIGHTS_AUTH NzAwZGM5OGYtYTdhZC00NThkLWI5NWMtMjA3ZjM3NmM3YmRi
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index ca47d898d..7d07eafcd 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -337,13 +337,13 @@ spec:
         tier: node
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "11.0.0-0"
+        dockerProviderVersion: "11.0.0-1"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10272020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -496,13 +496,13 @@ spec:
         rsName: "omsagent-rs"
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "11.0.0-0"
+        dockerProviderVersion: "11.0.0-1"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10272020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -642,13 +642,13 @@ spec:
         tier: node-win
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "11.0.0-0"
+        dockerProviderVersion: "11.0.0-1"
         schema-versions: "v1"
     spec:
      serviceAccountName: omsagent
      containers:
        - name: omsagent-win
-         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod10272020"
+         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod11092020"
          imagePullPolicy: IfNotPresent
          resources:
           limits:
diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index c4545d705..10ea235b2 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -3,7 +3,7 @@ MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
 
-ARG IMAGE_TAG=win-ciprod10272020
+ARG IMAGE_TAG=win-ciprod11092020
 
 # Do not split this into multiple RUN!
 # Docker creates a layer for every RUN-Statement
diff --git a/scripts/onboarding/managed/enable-monitoring.ps1 b/scripts/onboarding/managed/enable-monitoring.ps1
index 22d34894f..b052f22c5 100644
--- a/scripts/onboarding/managed/enable-monitoring.ps1
+++ b/scripts/onboarding/managed/enable-monitoring.ps1
@@ -60,7 +60,7 @@ $isUsingServicePrincipal = $false
 
 # released chart version in mcr
 $mcr = "mcr.microsoft.com"
-$mcrChartVersion = "2.7.8"
+$mcrChartVersion = "2.7.9"
 $mcrChartRepoPath = "azuremonitor/containerinsights/preview/azuremonitor-containers"
 $helmLocalRepoName = "."
 
diff --git a/scripts/onboarding/managed/enable-monitoring.sh b/scripts/onboarding/managed/enable-monitoring.sh
index e0d26c370..bb6974258 100644
--- a/scripts/onboarding/managed/enable-monitoring.sh
+++ b/scripts/onboarding/managed/enable-monitoring.sh
@@ -42,7 +42,7 @@ set -o pipefail
 defaultAzureCloud="AzureCloud"
 
 # released chart version in mcr
-mcrChartVersion="2.7.8"
+mcrChartVersion="2.7.9"
 mcr="mcr.microsoft.com"
 mcrChartRepoPath="azuremonitor/containerinsights/preview/azuremonitor-containers"
 helmLocalRepoName="."
diff --git a/scripts/onboarding/managed/upgrade-monitoring.sh b/scripts/onboarding/managed/upgrade-monitoring.sh
index 4134d710f..11ecf6819 100644
--- a/scripts/onboarding/managed/upgrade-monitoring.sh
+++ b/scripts/onboarding/managed/upgrade-monitoring.sh
@@ -20,7 +20,7 @@ set -e
 set -o pipefail
 
 # released chart version for Azure Arc enabled Kubernetes public preview
-mcrChartVersion="2.7.8"
+mcrChartVersion="2.7.9"
 mcr="mcr.microsoft.com"
 mcrChartRepoPath="azuremonitor/containerinsights/preview/azuremonitor-containers"
 

From ca18850046fd54f7830bbe2addb51039928c3514 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Mon, 9 Nov 2020 18:47:36 -0800
Subject: [PATCH 43/60] MDM exception aggregation (#470)

---
 source/plugins/ruby/constants.rb | 112 ++++++++++++++++---------------
 source/plugins/ruby/out_mdm.rb   |  51 ++++++++++++--
 2 files changed, 104 insertions(+), 59 deletions(-)

diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index 0e5099c5e..079584c7b 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -1,61 +1,61 @@
 # frozen_string_literal: true
 
 class Constants
-    INSIGHTSMETRICS_TAGS_ORIGIN = "container.azm.ms"
-    INSIGHTSMETRICS_TAGS_CLUSTERID = "container.azm.ms/clusterId"
-    INSIGHTSMETRICS_TAGS_CLUSTERNAME = "container.azm.ms/clusterName"
-    INSIGHTSMETRICS_TAGS_GPU_VENDOR = "gpuVendor"
-    INSIGHTSMETRICS_TAGS_GPU_NAMESPACE = "container.azm.ms/gpu"
-    INSIGHTSMETRICS_TAGS_GPU_MODEL = "gpuModel"
-    INSIGHTSMETRICS_TAGS_GPU_ID = "gpuId"
-    INSIGHTSMETRICS_TAGS_CONTAINER_NAME = "containerName"
-    INSIGHTSMETRICS_TAGS_CONTAINER_ID = "containerName"
-    INSIGHTSMETRICS_TAGS_K8SNAMESPACE = "k8sNamespace"
-    INSIGHTSMETRICS_TAGS_CONTROLLER_NAME = "controllerName"
-    INSIGHTSMETRICS_TAGS_CONTROLLER_KIND = "controllerKind"
-    INSIGHTSMETRICS_TAGS_POD_UID = "podUid"
-    INSIGTHTSMETRICS_TAGS_PV_NAMESPACE = "container.azm.ms/pv"
-    INSIGHTSMETRICS_TAGS_PVC_NAME = "pvcName"
-    INSIGHTSMETRICS_TAGS_PVC_NAMESPACE = "pvcNamespace"
-    INSIGHTSMETRICS_TAGS_POD_NAME = "podName"
-    INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES = "pvCapacityBytes"
-    INSIGHTSMETRICS_TAGS_VOLUME_NAME = "volumeName"
-    INSIGHTSMETRICS_FLUENT_TAG = "oms.api.InsightsMetrics"
-    REASON_OOM_KILLED = "oomkilled"
-    #Kubestate (common)
-    INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE = "container.azm.ms/kubestate"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME = "creationTime"
-    #Kubestate (deployments)
-    INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_DEPLOYMENT_STATE = "kube_deployment_status_replicas_ready"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_NAME = "deployment"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_CREATIONTIME = "creationTime"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STRATEGY = "deploymentStrategy"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_SPEC_REPLICAS = "spec_replicas"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_UPDATED = "status_replicas_updated"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_AVAILABLE = "status_replicas_available"
-    #Kubestate (HPA)
-    INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_HPA_STATE = "kube_hpa_status_current_replicas"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_NAME = "hpa"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MAX_REPLICAS = "spec_max_replicas"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MIN_REPLICAS = "spec_min_replicas"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_KIND = "targetKind"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_NAME = "targetName"
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_DESIRED_REPLICAS = "status_desired_replicas"
-    
-    INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_LAST_SCALE_TIME = "lastScaleTime"
-    # MDM Metric names
-    MDM_OOM_KILLED_CONTAINER_COUNT = "oomKilledContainerCount"
-    MDM_CONTAINER_RESTART_COUNT = "restartingContainerCount"
-    MDM_POD_READY_PERCENTAGE = "podReadyPercentage"
-    MDM_STALE_COMPLETED_JOB_COUNT = "completedJobsCount"
-    MDM_DISK_USED_PERCENTAGE = "diskUsedPercentage"
-    MDM_CONTAINER_CPU_UTILIZATION_METRIC = "cpuExceededPercentage"
-    MDM_CONTAINER_MEMORY_RSS_UTILIZATION_METRIC = "memoryRssExceededPercentage"
-    MDM_CONTAINER_MEMORY_WORKING_SET_UTILIZATION_METRIC = "memoryWorkingSetExceededPercentage"
-    MDM_PV_UTILIZATION_METRIC = "pvUsageExceededPercentage"
-    MDM_NODE_CPU_USAGE_PERCENTAGE = "cpuUsagePercentage"
-    MDM_NODE_MEMORY_RSS_PERCENTAGE = "memoryRssPercentage"
-    MDM_NODE_MEMORY_WORKING_SET_PERCENTAGE = "memoryWorkingSetPercentage"
+  INSIGHTSMETRICS_TAGS_ORIGIN = "container.azm.ms"
+  INSIGHTSMETRICS_TAGS_CLUSTERID = "container.azm.ms/clusterId"
+  INSIGHTSMETRICS_TAGS_CLUSTERNAME = "container.azm.ms/clusterName"
+  INSIGHTSMETRICS_TAGS_GPU_VENDOR = "gpuVendor"
+  INSIGHTSMETRICS_TAGS_GPU_NAMESPACE = "container.azm.ms/gpu"
+  INSIGHTSMETRICS_TAGS_GPU_MODEL = "gpuModel"
+  INSIGHTSMETRICS_TAGS_GPU_ID = "gpuId"
+  INSIGHTSMETRICS_TAGS_CONTAINER_NAME = "containerName"
+  INSIGHTSMETRICS_TAGS_CONTAINER_ID = "containerName"
+  INSIGHTSMETRICS_TAGS_K8SNAMESPACE = "k8sNamespace"
+  INSIGHTSMETRICS_TAGS_CONTROLLER_NAME = "controllerName"
+  INSIGHTSMETRICS_TAGS_CONTROLLER_KIND = "controllerKind"
+  INSIGHTSMETRICS_TAGS_POD_UID = "podUid"
+  INSIGTHTSMETRICS_TAGS_PV_NAMESPACE = "container.azm.ms/pv"
+  INSIGHTSMETRICS_TAGS_PVC_NAME = "pvcName"
+  INSIGHTSMETRICS_TAGS_PVC_NAMESPACE = "pvcNamespace"
+  INSIGHTSMETRICS_TAGS_POD_NAME = "podName"
+  INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES = "pvCapacityBytes"
+  INSIGHTSMETRICS_TAGS_VOLUME_NAME = "volumeName"
+  INSIGHTSMETRICS_FLUENT_TAG = "oms.api.InsightsMetrics"
+  REASON_OOM_KILLED = "oomkilled"
+  #Kubestate (common)
+  INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE = "container.azm.ms/kubestate"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME = "creationTime"
+  #Kubestate (deployments)
+  INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_DEPLOYMENT_STATE = "kube_deployment_status_replicas_ready"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_NAME = "deployment"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_CREATIONTIME = "creationTime"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STRATEGY = "deploymentStrategy"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_SPEC_REPLICAS = "spec_replicas"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_UPDATED = "status_replicas_updated"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_AVAILABLE = "status_replicas_available"
+  #Kubestate (HPA)
+  INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_HPA_STATE = "kube_hpa_status_current_replicas"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_NAME = "hpa"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MAX_REPLICAS = "spec_max_replicas"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MIN_REPLICAS = "spec_min_replicas"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_KIND = "targetKind"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_NAME = "targetName"
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_DESIRED_REPLICAS = "status_desired_replicas"
+
+  INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_LAST_SCALE_TIME = "lastScaleTime"
+  # MDM Metric names
+  MDM_OOM_KILLED_CONTAINER_COUNT = "oomKilledContainerCount"
+  MDM_CONTAINER_RESTART_COUNT = "restartingContainerCount"
+  MDM_POD_READY_PERCENTAGE = "podReadyPercentage"
+  MDM_STALE_COMPLETED_JOB_COUNT = "completedJobsCount"
+  MDM_DISK_USED_PERCENTAGE = "diskUsedPercentage"
+  MDM_CONTAINER_CPU_UTILIZATION_METRIC = "cpuExceededPercentage"
+  MDM_CONTAINER_MEMORY_RSS_UTILIZATION_METRIC = "memoryRssExceededPercentage"
+  MDM_CONTAINER_MEMORY_WORKING_SET_UTILIZATION_METRIC = "memoryWorkingSetExceededPercentage"
+  MDM_PV_UTILIZATION_METRIC = "pvUsageExceededPercentage"
+  MDM_NODE_CPU_USAGE_PERCENTAGE = "cpuUsagePercentage"
+  MDM_NODE_MEMORY_RSS_PERCENTAGE = "memoryRssPercentage"
+  MDM_NODE_MEMORY_WORKING_SET_PERCENTAGE = "memoryWorkingSetPercentage"
 
   CONTAINER_TERMINATED_RECENTLY_IN_MINUTES = 5
   OBJECT_NAME_K8S_CONTAINER = "K8SContainer"
@@ -88,6 +88,8 @@ class Constants
   KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 15
   ZERO_FILL_METRICS_INTERVAL_IN_MINUTES = 30
   MDM_TIME_SERIES_FLUSHED_IN_LAST_HOUR = "MdmTimeSeriesFlushedInLastHour"
+  MDM_EXCEPTION_TELEMETRY_METRIC = "AKSCustomMetricsMdmExceptions"
+  MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL = 30
 
   #Pod Statuses
   POD_STATUS_TERMINATING = "Terminating"
diff --git a/source/plugins/ruby/out_mdm.rb b/source/plugins/ruby/out_mdm.rb
index 1c805255a..6238eb51a 100644
--- a/source/plugins/ruby/out_mdm.rb
+++ b/source/plugins/ruby/out_mdm.rb
@@ -50,6 +50,10 @@ def initialize
       @cluster_identity = nil
       @isArcK8sCluster = false
       @get_access_token_backoff_expiry = Time.now
+
+      @mdm_exceptions_hash = {}
+      @mdm_exceptions_count = 0
+      @mdm_exception_telemetry_time_tracker = DateTime.now.to_time.to_i
     end
 
     def configure(conf)
@@ -221,10 +225,49 @@ def format(tag, time, record)
       end
     end
 
+    def exception_aggregator(error)
+      begin
+        errorStr = error.to_s
+        if (@mdm_exceptions_hash[errorStr].nil?)
+          @mdm_exceptions_hash[errorStr] = 1
+        else
+          @mdm_exceptions_hash[errorStr] += 1
+        end
+        #Keeping track of all exceptions to send the total in the last flush interval as a metric
+        @mdm_exceptions_count += 1
+      rescue => error
+        @log.info "Error in MDM exception_aggregator method: #{error}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(error)
+      end
+    end
+
+    def flush_mdm_exception_telemetry
+      begin
+        #Flush out exception telemetry as a metric for the last 30 minutes
+        timeDifference = (DateTime.now.to_time.to_i - @mdm_exception_telemetry_time_tracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if (timeDifferenceInMinutes >= Constants::MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL)
+          telemetryProperties = {}
+          telemetryProperties["ExceptionsHashForFlushInterval"] = @mdm_exceptions_hash.to_json
+          telemetryProperties["FlushInterval"] = Constants::MDM_EXCEPTIONS_METRIC_FLUSH_INTERVAL
+          ApplicationInsightsUtility.sendMetricTelemetry(Constants::MDM_EXCEPTION_TELEMETRY_METRIC, @mdm_exceptions_count, telemetryProperties)
+          # Resetting values after flushing
+          @mdm_exceptions_count = 0
+          @mdm_exceptions_hash = {}
+          @mdm_exception_telemetry_time_tracker = DateTime.now.to_time.to_i
+        end
+      rescue => error
+        @log.info "Error in flush_mdm_exception_telemetry method: #{error}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(error)
+      end
+    end
+
     # This method is called every flush interval. Send the buffer chunk to MDM.
     # 'chunk' is a buffer chunk that includes multiple formatted records
     def write(chunk)
       begin
+        # Adding this before trying to flush out metrics, since adding after can lead to metrics never being sent
+        flush_mdm_exception_telemetry
         if (!@first_post_attempt_made || (Time.now > @last_post_attempt_time + retry_mdm_post_wait_minutes * 60)) && @can_send_data_to_mdm
           post_body = []
           chunk.msgpack_each { |(tag, record)|
@@ -247,7 +290,8 @@ def write(chunk)
           end
         end
       rescue Exception => e
-        ApplicationInsightsUtility.sendExceptionTelemetry(e)
+        # Adding exceptions to hash to aggregate and send telemetry for all write errors
+        exception_aggregator(e)
         @log.info "Exception when writing to MDM: #{e}"
         raise e
       end
@@ -282,7 +326,6 @@ def send_to_mdm(post_body)
         else
           @log.info "Failed to Post Metrics to MDM : #{e} Response: #{response}"
         end
-        #@log.info "MDM request : #{post_body}"
         @log.debug_backtrace(e.backtrace)
         if !response.code.empty? && response.code == 403.to_s
           @log.info "Response Code #{response.code} Updating @last_post_attempt_time"
@@ -297,15 +340,15 @@ def send_to_mdm(post_body)
           @log.info "HTTPServerException when POSTing Metrics to MDM #{e} Response: #{response}"
           raise e
         end
+        # Adding exceptions to hash to aggregate and send telemetry for all 400 error codes
+        exception_aggregator(e)
       rescue Errno::ETIMEDOUT => e
         @log.info "Timed out when POSTing Metrics to MDM : #{e} Response: #{response}"
         @log.debug_backtrace(e.backtrace)
-        ApplicationInsightsUtility.sendExceptionTelemetry(e)
         raise e
       rescue Exception => e
         @log.info "Exception POSTing Metrics to MDM : #{e} Response: #{response}"
         @log.debug_backtrace(e.backtrace)
-        ApplicationInsightsUtility.sendExceptionTelemetry(e)
         raise e
       end
     end

From 18c27dda3e8af3187502f4ecfc9475dea74f3ce5 Mon Sep 17 00:00:00 2001
From: Grace Wehner <grace.wehner@microsoft.com>
Date: Mon, 23 Nov 2020 08:37:38 -0800
Subject: [PATCH 44/60] grwehner/mdm custom metric regions (#471)

Remove custom metrics region check for public cloud
---
 build/linux/installer/conf/container.conf            |  2 --
 build/linux/installer/conf/kube.conf                 |  3 ---
 .../templates/omsagent-rs-configmap.yaml             |  3 ---
 kubernetes/linux/main.sh                             | 11 +++++++++++
 kubernetes/omsagent.yaml                             |  3 ---
 kubernetes/windows/main.ps1                          |  6 ++++++
 .../preview/health/omsagent-template-aks-engine.yaml |  2 --
 scripts/preview/health/omsagent-template.yaml        |  2 --
 source/plugins/ruby/CustomMetricsUtils.rb            | 12 +++---------
 source/plugins/ruby/filter_cadvisor2mdm.rb           |  3 +--
 source/plugins/ruby/filter_inventory2mdm.rb          |  3 +--
 source/plugins/ruby/filter_telegraf2mdm.rb           |  3 +--
 source/plugins/ruby/in_kube_podinventory.rb          |  3 +--
 source/plugins/ruby/podinventory_to_mdm.rb           |  4 ++--
 14 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/build/linux/installer/conf/container.conf b/build/linux/installer/conf/container.conf
index f7e6e1da9..958a85eb6 100644
--- a/build/linux/installer/conf/container.conf
+++ b/build/linux/installer/conf/container.conf
@@ -45,14 +45,12 @@
 #custom_metrics_mdm filter plugin
 <filter mdm.cadvisorperf**>
   type filter_cadvisor2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
   metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,memoryRssBytes,pvUsedBytes
   log_level info
 </filter>
 
 <filter oms.mdm.container.perf.telegraf**>
   type filter_telegraf2mdm
-  custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
   log_level debug
 </filter>
 
diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf
index dbb4db0da..121472eba 100644
--- a/build/linux/installer/conf/kube.conf
+++ b/build/linux/installer/conf/kube.conf
@@ -13,7 +13,6 @@
      tag oms.containerinsights.KubePodInventory
      run_interval 60
      log_level debug
-     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
     </source>
 
     #Kubernetes events
@@ -66,14 +65,12 @@
 
     <filter mdm.kubenodeinventory**>
      type filter_inventory2mdm
-     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
      log_level info
     </filter>
 
     #custom_metrics_mdm filter plugin for perf data from windows nodes
     <filter mdm.cadvisorperf**>
      type filter_cadvisor2mdm
-     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
      metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes
      log_level info
     </filter>
diff --git a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
index 475b17a46..e1bc969cb 100644
--- a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
@@ -18,7 +18,6 @@ data:
       tag oms.containerinsights.KubePodInventory
       run_interval 60
       log_level debug
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
      </source>
 
      #Kubernetes events
@@ -70,14 +69,12 @@ data:
      </source>
      <filter mdm.kubenodeinventory**>
       type filter_inventory2mdm
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
       log_level info
      </filter>
 
      # custom_metrics_mdm filter plugin for perf data from windows nodes
      <filter mdm.cadvisorperf**>
       type filter_cadvisor2mdm
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
       metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
       log_level info
      </filter>
diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh
index b093eb74b..a2ba6a1d1 100644
--- a/kubernetes/linux/main.sh
+++ b/kubernetes/linux/main.sh
@@ -150,6 +150,17 @@ else
       echo "LA Onboarding:Workspace Id not mounted, skipping the telemetry check"
 fi
 
+# Set environment variable for if public cloud by checking the workspace domain.
+if [ -z $domain ]; then
+  ClOUD_ENVIRONMENT="unknown"
+elif [ $domain == "opinsights.azure.com" ]; then
+  CLOUD_ENVIRONMENT="public"
+else
+  CLOUD_ENVIRONMENT="national"
+fi
+export CLOUD_ENVIRONMENT=$CLOUD_ENVIRONMENT
+echo "export CLOUD_ENVIRONMENT=$CLOUD_ENVIRONMENT" >> ~/.bashrc
+
 #Parse the configmap to set the right environment variables.
 /opt/microsoft/omsagent/ruby/bin/ruby tomlparser.rb
 
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 7d07eafcd..2155361e9 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -64,7 +64,6 @@ data:
       tag oms.containerinsights.KubePodInventory
       run_interval 60
       log_level debug
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
      </source>
 
      #Kubernetes events
@@ -117,14 +116,12 @@ data:
 
      <filter mdm.kubenodeinventory**>
       type filter_inventory2mdm
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
       log_level info
      </filter>
 
      #custom_metrics_mdm filter plugin for perf data from windows nodes
      <filter mdm.cadvisorperf**>
       type filter_cadvisor2mdm
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westeurope,southafricanorth,centralus,northcentralus,eastus2,koreacentral,eastasia,centralindia,uksouth,canadacentral,francecentral,japaneast,australiaeast,eastus2,westus,australiasoutheast,brazilsouth,germanywestcentral,northcentralus,switzerlandnorth
       metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes,pvUsedBytes
       log_level info
      </filter>
diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1
index 2e8659601..d32e5068a 100644
--- a/kubernetes/windows/main.ps1
+++ b/kubernetes/windows/main.ps1
@@ -43,15 +43,21 @@ function Start-FileSystemWatcher {
 
 function Set-EnvironmentVariables {
     $domain = "opinsights.azure.com"
+    $cloud_environment = "public"
     if (Test-Path /etc/omsagent-secret/DOMAIN) {
         # TODO: Change to omsagent-secret before merging
         $domain = Get-Content /etc/omsagent-secret/DOMAIN
+        $cloud_environment = "national"
     }
 
     # Set DOMAIN
     [System.Environment]::SetEnvironmentVariable("DOMAIN", $domain, "Process")
     [System.Environment]::SetEnvironmentVariable("DOMAIN", $domain, "Machine")
 
+    # Set CLOUD_ENVIRONMENT
+    [System.Environment]::SetEnvironmentVariable("CLOUD_ENVIRONMENT", $cloud_environment, "Process")
+    [System.Environment]::SetEnvironmentVariable("CLOUD_ENVIRONMENT", $cloud_environment, "Machine")
+
     $wsID = ""
     if (Test-Path /etc/omsagent-secret/WSID) {
         # TODO: Change to omsagent-secret before merging
diff --git a/scripts/preview/health/omsagent-template-aks-engine.yaml b/scripts/preview/health/omsagent-template-aks-engine.yaml
index 5526602c0..5e063fd54 100644
--- a/scripts/preview/health/omsagent-template-aks-engine.yaml
+++ b/scripts/preview/health/omsagent-template-aks-engine.yaml
@@ -108,14 +108,12 @@ data:
 
     <filter mdm.kubepodinventory** mdm.kubenodeinventory**>
      type filter_inventory2mdm
-     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
      log_level info
     </filter>
 
     # custom_metrics_mdm filter plugin for perf data from windows nodes
     <filter mdm.cadvisorperf**>
      type filter_cadvisor2mdm
-     custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
      metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
      log_level info
     </filter>
diff --git a/scripts/preview/health/omsagent-template.yaml b/scripts/preview/health/omsagent-template.yaml
index 6e3a52020..e58e9c33f 100644
--- a/scripts/preview/health/omsagent-template.yaml
+++ b/scripts/preview/health/omsagent-template.yaml
@@ -108,14 +108,12 @@ data:
 
      <filter mdm.kubepodinventory** mdm.kubenodeinventory**>
       type filter_inventory2mdm
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
       log_level info
      </filter>
 
      # custom_metrics_mdm filter plugin for perf data from windows nodes
      <filter mdm.cadvisorperf**>
       type filter_cadvisor2mdm
-      custom_metrics_azure_regions eastus,southcentralus,westcentralus,westus2,southeastasia,northeurope,westEurope
       metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
       log_level info
      </filter>
diff --git a/source/plugins/ruby/CustomMetricsUtils.rb b/source/plugins/ruby/CustomMetricsUtils.rb
index a19580630..220313e6b 100644
--- a/source/plugins/ruby/CustomMetricsUtils.rb
+++ b/source/plugins/ruby/CustomMetricsUtils.rb
@@ -6,21 +6,15 @@ def initialize
     end
 
     class << self
-        def check_custom_metrics_availability(custom_metric_regions)
+        def check_custom_metrics_availability
             aks_region = ENV['AKS_REGION']
             aks_resource_id = ENV['AKS_RESOURCE_ID']
+            aks_cloud_environment = ENV['CLOUD_ENVIRONMENT']
             if aks_region.to_s.empty? || aks_resource_id.to_s.empty?
                 return false # This will also take care of AKS-Engine Scenario. AKS_REGION/AKS_RESOURCE_ID is not set for AKS-Engine. Only ACS_RESOURCE_NAME is set
             end
             
-            custom_metrics_regions_arr = custom_metric_regions.split(',')
-            custom_metrics_regions_hash = custom_metrics_regions_arr.map {|x| [x.downcase,true]}.to_h
-
-            if custom_metrics_regions_hash.key?(aks_region.downcase)
-                true
-            else 
-                false
-            end
+            return aks_cloud_environment.to_s.downcase == 'public'
         end
     end
 end
\ No newline at end of file
diff --git a/source/plugins/ruby/filter_cadvisor2mdm.rb b/source/plugins/ruby/filter_cadvisor2mdm.rb
index 3bc674ea8..2423ad024 100644
--- a/source/plugins/ruby/filter_cadvisor2mdm.rb
+++ b/source/plugins/ruby/filter_cadvisor2mdm.rb
@@ -15,7 +15,6 @@ class CAdvisor2MdmFilter < Filter
 
     config_param :enable_log, :integer, :default => 0
     config_param :log_path, :string, :default => "/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log"
-    config_param :custom_metrics_azure_regions, :string
     config_param :metrics_to_collect, :string, :default => "Constants::CPU_USAGE_NANO_CORES,Constants::MEMORY_WORKING_SET_BYTES,Constants::MEMORY_RSS_BYTES,Constants::PV_USED_BYTES"
 
     @@hostName = (OMS::Common.get_hostname)
@@ -42,7 +41,7 @@ def configure(conf)
     def start
       super
       begin
-        @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions)
+        @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability
         @metrics_to_collect_hash = build_metrics_hash
         @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}"
         @@containerResourceUtilTelemetryTimeTracker = DateTime.now.to_time.to_i
diff --git a/source/plugins/ruby/filter_inventory2mdm.rb b/source/plugins/ruby/filter_inventory2mdm.rb
index b5ef587ff..38ccab885 100644
--- a/source/plugins/ruby/filter_inventory2mdm.rb
+++ b/source/plugins/ruby/filter_inventory2mdm.rb
@@ -13,7 +13,6 @@ class Inventory2MdmFilter < Filter
 
 		config_param :enable_log, :integer, :default => 0
         config_param :log_path, :string, :default => '/var/opt/microsoft/docker-cimprov/log/filter_inventory2mdm.log'
-        config_param :custom_metrics_azure_regions, :string
 
         @@node_count_metric_name = 'nodesCount'
         @@pod_count_metric_name = 'podCount'
@@ -98,7 +97,7 @@ def configure(conf)
 
         def start
             super
-            @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions)
+            @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability
             @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}"
         end
 
diff --git a/source/plugins/ruby/filter_telegraf2mdm.rb b/source/plugins/ruby/filter_telegraf2mdm.rb
index 98d258ea5..88ae428d1 100644
--- a/source/plugins/ruby/filter_telegraf2mdm.rb
+++ b/source/plugins/ruby/filter_telegraf2mdm.rb
@@ -15,7 +15,6 @@ class Telegraf2MdmFilter < Filter
 
     config_param :enable_log, :integer, :default => 0
     config_param :log_path, :string, :default => "/var/opt/microsoft/docker-cimprov/log/filter_telegraf2mdm.log"
-    config_param :custom_metrics_azure_regions, :string
 
     @process_incoming_stream = true
 
@@ -36,7 +35,7 @@ def configure(conf)
     def start
       super
       begin
-        @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(@custom_metrics_azure_regions)
+        @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability
         @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}"
       rescue => errorStr
         @log.info "Error initializing plugin #{errorStr}"
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index 4880d80e7..bba3e920f 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -36,11 +36,10 @@ def initialize
 
     config_param :run_interval, :time, :default => 60
     config_param :tag, :string, :default => "oms.containerinsights.KubePodInventory"
-    config_param :custom_metrics_azure_regions, :string
 
     def configure(conf)
       super
-      @inventoryToMdmConvertor = Inventory2MdmConvertor.new(@custom_metrics_azure_regions)
+      @inventoryToMdmConvertor = Inventory2MdmConvertor.new()
     end
 
     def start
diff --git a/source/plugins/ruby/podinventory_to_mdm.rb b/source/plugins/ruby/podinventory_to_mdm.rb
index 834515969..77370e284 100644
--- a/source/plugins/ruby/podinventory_to_mdm.rb
+++ b/source/plugins/ruby/podinventory_to_mdm.rb
@@ -80,14 +80,14 @@ class Inventory2MdmConvertor
   @@pod_phase_values = ["Running", "Pending", "Succeeded", "Failed", "Unknown"]
   @process_incoming_stream = false
 
-  def initialize(custom_metrics_azure_regions)
+  def initialize()
     @log_path = "/var/opt/microsoft/docker-cimprov/log/mdm_metrics_generator.log"
     @log = Logger.new(@log_path, 1, 5000000)
     @pod_count_hash = {}
     @no_phase_dim_values_hash = {}
     @pod_count_by_phase = {}
     @pod_uids = {}
-    @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability(custom_metrics_azure_regions)
+    @process_incoming_stream = CustomMetricsUtils.check_custom_metrics_availability
     @log.debug "After check_custom_metrics_availability process_incoming_stream #{@process_incoming_stream}"
     @log.debug { "Starting podinventory_to_mdm plugin" }
   end

From a5c12e9a5e28dc27b8288d21bc72b5937b93e370 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 3 Dec 2020 17:20:51 -0800
Subject: [PATCH 45/60] updaitng rs limit to 1gb (#474)

---
 charts/azuremonitor-containers/values.yaml | 2 +-
 kubernetes/omsagent.yaml                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 76ea0a26d..e8acda20e 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -133,4 +133,4 @@ omsagent:
         memory: 250Mi
       limits:
         cpu: 1
-        memory: 750Mi
+        memory: 1Gi
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 2155361e9..296de02bf 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -504,7 +504,7 @@ spec:
           resources:
             limits:
               cpu: 1
-              memory: 750Mi
+              memory: 1Gi
             requests:
               cpu: 150m
               memory: 250Mi

From 7453fd4e3d8a918a70683a5a3a8344bd550a5349 Mon Sep 17 00:00:00 2001
From: Grace Wehner <grace.wehner@microsoft.com>
Date: Thu, 10 Dec 2020 10:45:09 -0800
Subject: [PATCH 46/60] grwehner/pv inventory (#455)

Add fluentd plugin to request persistent volume info from the kubernetes api and send to LA
---
 build/linux/installer/conf/kube.conf          |  23 ++
 .../installer/datafiles/base_container.data   |   1 +
 kubernetes/omsagent.yaml                      |  24 ++
 source/plugins/ruby/constants.rb              |   4 +
 source/plugins/ruby/in_kube_pvinventory.rb    | 253 ++++++++++++++++++
 5 files changed, 305 insertions(+)
 create mode 100644 source/plugins/ruby/in_kube_pvinventory.rb

diff --git a/build/linux/installer/conf/kube.conf b/build/linux/installer/conf/kube.conf
index 121472eba..fb566c360 100644
--- a/build/linux/installer/conf/kube.conf
+++ b/build/linux/installer/conf/kube.conf
@@ -15,6 +15,14 @@
      log_level debug
     </source>
 
+    #Kubernetes Persistent Volume inventory
+    <source>
+     type kubepvinventory
+     tag oms.containerinsights.KubePVInventory
+     run_interval 60
+     log_level debug
+    </source>
+
     #Kubernetes events
     <source>
      type kubeevents
@@ -95,6 +103,21 @@
      max_retry_wait 5m
     </match>
 
+    <match oms.containerinsights.KubePVInventory**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/state/out_oms_kubepv*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
+
     <match oms.containerinsights.KubeEvents**>
      type out_oms
      log_level debug
diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data
index ca2538b79..ec42d5967 100644
--- a/build/linux/installer/datafiles/base_container.data
+++ b/build/linux/installer/datafiles/base_container.data
@@ -22,6 +22,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/microsoft/omsagent/plugin/filter_container.rb;			    source/plugins/ruby/filter_container.rb;				644; root; root
 
 /opt/microsoft/omsagent/plugin/in_kube_podinventory.rb;			source/plugins/ruby/in_kube_podinventory.rb;			644; root; root
+/opt/microsoft/omsagent/plugin/in_kube_pvinventory.rb;			source/plugins/ruby/in_kube_pvinventory.rb;			644; root; root
 /opt/microsoft/omsagent/plugin/in_kube_events.rb;			    source/plugins/ruby/in_kube_events.rb;				644; root; root
 /opt/microsoft/omsagent/plugin/KubernetesApiClient.rb;			source/plugins/ruby/KubernetesApiClient.rb;			644; root; root
 
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 296de02bf..26c7ae9a0 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -21,6 +21,7 @@ rules:
         "nodes/proxy",
         "namespaces",
         "services",
+        "persistentvolumes"
       ]
     verbs: ["list", "get", "watch"]
   - apiGroups: ["apps", "extensions", "autoscaling"]
@@ -66,6 +67,14 @@ data:
       log_level debug
      </source>
 
+     #Kubernetes Persistent Volume inventory
+     <source>
+      type kubepvinventory
+      tag oms.containerinsights.KubePVInventory
+      run_interval 60
+      log_level debug
+     </source>
+
      #Kubernetes events
      <source>
       type kubeevents
@@ -146,6 +155,21 @@ data:
       max_retry_wait 5m
      </match>
 
+     <match oms.containerinsights.KubePVInventory**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/state/out_oms_kubepv*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
+
      <match oms.containerinsights.KubeEvents**>
       type out_oms
       log_level debug
diff --git a/source/plugins/ruby/constants.rb b/source/plugins/ruby/constants.rb
index 079584c7b..cf41900dc 100644
--- a/source/plugins/ruby/constants.rb
+++ b/source/plugins/ruby/constants.rb
@@ -77,6 +77,9 @@ class Constants
   OMSAGENT_ZERO_FILL = "omsagent"
   KUBESYSTEM_NAMESPACE_ZERO_FILL = "kube-system"
   VOLUME_NAME_ZERO_FILL = "-"
+  PV_TYPES =["awsElasticBlockStore", "azureDisk", "azureFile", "cephfs", "cinder", "csi", "fc", "flexVolume",
+    "flocker", "gcePersistentDisk", "glusterfs", "hostPath", "iscsi", "local", "nfs",
+    "photonPersistentDisk", "portworxVolume", "quobyte", "rbd", "scaleIO", "storageos", "vsphereVolume"]
 
   #Telemetry constants
   CONTAINER_METRICS_HEART_BEAT_EVENT = "ContainerMetricsMdmHeartBeatEvent"
@@ -84,6 +87,7 @@ class Constants
   CONTAINER_RESOURCE_UTIL_HEART_BEAT_EVENT = "ContainerResourceUtilMdmHeartBeatEvent"
   PV_USAGE_HEART_BEAT_EVENT = "PVUsageMdmHeartBeatEvent"
   PV_KUBE_SYSTEM_METRICS_ENABLED_EVENT = "CollectPVKubeSystemMetricsEnabled"
+  PV_INVENTORY_HEART_BEAT_EVENT = "KubePVInventoryHeartBeatEvent"
   TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 10
   KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES = 15
   ZERO_FILL_METRICS_INTERVAL_IN_MINUTES = 30
diff --git a/source/plugins/ruby/in_kube_pvinventory.rb b/source/plugins/ruby/in_kube_pvinventory.rb
new file mode 100644
index 000000000..b0e09c85b
--- /dev/null
+++ b/source/plugins/ruby/in_kube_pvinventory.rb
@@ -0,0 +1,253 @@
+module Fluent
+  class Kube_PVInventory_Input < Input
+    Plugin.register_input("kubepvinventory", self)
+
+    @@hostName = (OMS::Common.get_hostname)
+
+    def initialize
+      super
+      require "yaml"
+      require "yajl/json_gem"
+      require "yajl"
+      require "time"
+      require_relative "KubernetesApiClient"
+      require_relative "ApplicationInsightsUtility"
+      require_relative "oms_common"
+      require_relative "omslog"
+      require_relative "constants"
+
+      # Response size is around 1500 bytes per PV
+      @PV_CHUNK_SIZE = "5000"
+      @pvTypeToCountHash = {}
+    end
+
+    config_param :run_interval, :time, :default => 60
+    config_param :tag, :string, :default => "oms.containerinsights.KubePVInventory"
+
+    def configure(conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
+        @@pvTelemetryTimeTracker = DateTime.now.to_time.to_i
+      end
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
+      end
+    end
+
+    def enumerate
+      begin
+        pvInventory = nil
+        telemetryFlush = false
+        @pvTypeToCountHash = {}
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
+
+        continuationToken = nil
+        $log.info("in_kube_pvinventory::enumerate : Getting PVs from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, pvInventory = KubernetesApiClient.getResourcesAndContinuationToken("persistentvolumes?limit=#{@PV_CHUNK_SIZE}")
+        $log.info("in_kube_pvinventory::enumerate : Done getting PVs from Kube API @ #{Time.now.utc.iso8601}")
+
+        if (!pvInventory.nil? && !pvInventory.empty? && pvInventory.key?("items") && !pvInventory["items"].nil? && !pvInventory["items"].empty?)
+          parse_and_emit_records(pvInventory, batchTime)
+        else
+          $log.warn "in_kube_pvinventory::enumerate:Received empty pvInventory"
+        end
+
+        # If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, pvInventory = KubernetesApiClient.getResourcesAndContinuationToken("persistentvolumes?limit=#{@PV_CHUNK_SIZE}&continue=#{continuationToken}")
+          if (!pvInventory.nil? && !pvInventory.empty? && pvInventory.key?("items") && !pvInventory["items"].nil? && !pvInventory["items"].empty?)
+            parse_and_emit_records(pvInventory, batchTime)
+          else
+            $log.warn "in_kube_pvinventory::enumerate:Received empty pvInventory"
+          end
+        end
+
+        # Setting this to nil so that we dont hold memory until GC kicks in
+        pvInventory = nil
+
+        # Adding telemetry to send pod telemetry every 10 minutes
+        timeDifference = (DateTime.now.to_time.to_i - @@pvTelemetryTimeTracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
+          telemetryFlush = true
+        end
+        
+        # Flush AppInsights telemetry once all the processing is done
+        if telemetryFlush == true
+          telemetryProperties = {}
+          telemetryProperties["CountsOfPVTypes"] = @pvTypeToCountHash
+          ApplicationInsightsUtility.sendCustomEvent(Constants::PV_INVENTORY_HEART_BEAT_EVENT, telemetryProperties)
+          @@pvTelemetryTimeTracker = DateTime.now.to_time.to_i
+        end
+
+      rescue => errorStr
+        $log.warn "in_kube_pvinventory::enumerate:Failed in enumerate: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end # end enumerate
+
+    def parse_and_emit_records(pvInventory, batchTime = Time.utc.iso8601)
+      currentTime = Time.now
+      emitTime = currentTime.to_f
+      eventStream = MultiEventStream.new
+
+      begin
+        records = []
+        pvInventory["items"].each do |item|
+
+          # Node, pod, & usage info can be found by joining with pvUsedBytes metric using PVCNamespace/PVCName
+          record = {}
+          record["CollectionTime"] = batchTime
+          record["ClusterId"] = KubernetesApiClient.getClusterId
+          record["ClusterName"] = KubernetesApiClient.getClusterName
+          record["PVName"] = item["metadata"]["name"]
+          record["PVStatus"] = item["status"]["phase"]
+          record["PVAccessModes"] = item["spec"]["accessModes"].join(', ')
+          record["PVStorageClassName"] = item["spec"]["storageClassName"]
+          record["PVCapacityBytes"] = KubernetesApiClient.getMetricNumericValue("memory", item["spec"]["capacity"]["storage"])
+          record["PVCreationTimeStamp"] = item["metadata"]["creationTimestamp"]
+
+          # Optional values
+          pvcNamespace, pvcName = getPVCInfo(item)
+          type, typeInfo = getTypeInfo(item)
+          record["PVCNamespace"] = pvcNamespace
+          record["PVCName"] = pvcName
+          record["PVType"] = type
+          record["PVTypeInfo"] = typeInfo
+
+          records.push(record)
+
+          # Record telemetry
+          if type == nil
+            type = "empty"
+          end
+          if (@pvTypeToCountHash.has_key? type)
+            @pvTypeToCountHash[type] += 1
+          else
+            @pvTypeToCountHash[type] = 1
+          end
+        end
+
+        records.each do |record|
+          if !record.nil?
+            wrapper = {
+              "DataType" => "KUBE_PV_INVENTORY_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [record.each { |k, v| record[k] = v }],
+            }
+            eventStream.add(emitTime, wrapper) if wrapper
+          end
+        end
+
+        router.emit_stream(@tag, eventStream) if eventStream
+
+      rescue => errorStr
+        $log.warn "Failed in parse_and_emit_record for in_kube_pvinventory: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+    end
+
+    def getPVCInfo(item)
+      begin
+        if !item["spec"].nil? && !item["spec"]["claimRef"].nil?
+          claimRef = item["spec"]["claimRef"]
+          pvcNamespace = claimRef["namespace"]
+          pvcName = claimRef["name"]
+          return pvcNamespace, pvcName
+        end
+      rescue => errorStr
+        $log.warn "Failed in getPVCInfo for in_kube_pvinventory: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+
+      # No PVC or an error
+      return nil, nil
+    end
+
+    def getTypeInfo(item)
+      begin
+        if !item["spec"].nil?
+          (Constants::PV_TYPES).each do |pvType|
+      
+            # PV is this type
+            if !item["spec"][pvType].nil?
+
+              # Get additional info if azure disk/file
+              typeInfo = {}
+              if pvType == "azureDisk"
+                azureDisk = item["spec"]["azureDisk"]
+                typeInfo["DiskName"] = azureDisk["diskName"]
+                typeInfo["DiskUri"] = azureDisk["diskURI"]
+              elsif pvType == "azureFile"
+                typeInfo["FileShareName"] = item["spec"]["azureFile"]["shareName"]
+              end
+
+              # Can only have one type: return right away when found
+              return pvType, typeInfo
+
+            end
+          end
+        end
+      rescue => errorStr
+        $log.warn "Failed in getTypeInfo for in_kube_pvinventory: #{errorStr}"
+        $log.debug_backtrace(errorStr.backtrace)
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+
+      # No matches from list of types or an error
+      return nil, {}
+    end
+
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
+      until done
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
+        done = @finished
+        @mutex.unlock
+        if !done
+          begin
+            $log.info("in_kube_pvinventory::run_periodic.enumerate.start #{Time.now.utc.iso8601}")
+            enumerate
+            $log.info("in_kube_pvinventory::run_periodic.enumerate.end #{Time.now.utc.iso8601}")
+          rescue => errorStr
+            $log.warn "in_kube_pvinventory::run_periodic: enumerate Failed to retrieve pod inventory: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+          end
+        end
+        @mutex.lock
+      end
+      @mutex.unlock
+    end
+
+  end # Kube_PVInventory_Input
+end # module
\ No newline at end of file

From 24b709f9e3c3b18779102b491fc98b87a99d1335 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Tue, 15 Dec 2020 09:42:52 -0800
Subject: [PATCH 47/60] Gangams/fix for build release pipeline issue (#476)

* use isolated cdpx acr

* correct comment
---
 .pipelines/get-aad-app-creds-from-kv.sh           | 14 ++++++++++++++
 ...ll-from-cdpx-and-push-to-ci-acr-linux-image.sh | 15 ++++++++++++---
 ...-from-cdpx-and-push-to-ci-acr-windows-image.sh | 14 +++++++++++---
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/.pipelines/get-aad-app-creds-from-kv.sh b/.pipelines/get-aad-app-creds-from-kv.sh
index 8ef56cddb..a0ba464cc 100755
--- a/.pipelines/get-aad-app-creds-from-kv.sh
+++ b/.pipelines/get-aad-app-creds-from-kv.sh
@@ -11,6 +11,8 @@ do
            KV) KV=$VALUE ;;
            KVSECRETNAMEAPPID) AppId=$VALUE ;;
            KVSECRETNAMEAPPSECRET) AppSecret=$VALUE ;;
+           KVSECRETNAMECDPXAPPID) CdpxAppId=$VALUE ;;
+           KVSECRETNAMECDPXAPPSECRET) CdpxAppSecret=$VALUE ;;
            *)
     esac
 done
@@ -27,4 +29,16 @@ az keyvault secret download --file ~/acrappsecret --vault-name ${KV}  --name ${A
 
 echo "downloaded the appsecret from KV:${KV} and KV secret:${AppSecret}"
 
+echo "key vault secret name for cdpx appid:${KVSECRETNAMECDPXAPPID}"
+
+echo "key vault secret name for cdpx appsecret:${KVSECRETNAMECDPXAPPSECRET}"
+
+az keyvault secret download --file ~/cdpxacrappid --vault-name ${KV}  --name ${CdpxAppId}
+
+echo "downloaded the appid from KV:${KV} and KV secret:${CdpxAppId}"
+
+az keyvault secret download --file ~/cdpxacrappsecret --vault-name ${KV}  --name ${CdpxAppSecret}
+
+echo "downloaded the appsecret from KV:${KV} and KV secret:${CdpxAppSecret}"
+
 echo "end: get app id and secret from specified key vault"
diff --git a/.pipelines/pull-from-cdpx-and-push-to-ci-acr-linux-image.sh b/.pipelines/pull-from-cdpx-and-push-to-ci-acr-linux-image.sh
index 638d3a937..3844ea185 100755
--- a/.pipelines/pull-from-cdpx-and-push-to-ci-acr-linux-image.sh
+++ b/.pipelines/pull-from-cdpx-and-push-to-ci-acr-linux-image.sh
@@ -25,12 +25,21 @@ ACR_APP_ID=$(cat ~/acrappid)
 ACR_APP_SECRET=$(cat ~/acrappsecret)
 echo "end: read appid and appsecret"
 
+echo "start: read appid and appsecret for cdpx"
+CDPX_ACR_APP_ID=$(cat ~/cdpxacrappid)
+CDPX_ACR_APP_SECRET=$(cat ~/cdpxacrappsecret)
+echo "end: read appid and appsecret which has read access on cdpx acr"
+
+
+# Name of CDPX_ACR should be in this format :Naming convention: 'cdpx' + service tree id without '-' + two digit suffix like'00'/'01
+# suffix 00 primary and 01 secondary, and we only use primary
+# This configured via pipeline variable
 echo "login to cdpxlinux acr:${CDPX_ACR}"
-docker login $CDPX_ACR  --username $ACR_APP_ID --password $ACR_APP_SECRET
+docker login $CDPX_ACR  --username $CDPX_ACR_APP_ID --password $CDPX_ACR_APP_SECRET
 echo "login to cdpxlinux acr completed: ${CDPX_ACR}"
 
 echo "pull agent image from cdpxlinux acr: ${CDPX_ACR}"
-docker pull ${CDPX_ACR}/artifact/3170cdd2-19f0-4027-912b-1027311691a2/official/${CDPX_REPO_NAME}:${CDPX_AGENT_IMAGE_TAG}
+docker pull ${CDPX_ACR}/official/${CDPX_REPO_NAME}:${CDPX_AGENT_IMAGE_TAG}
 echo "pull image from cdpxlinux acr completed: ${CDPX_ACR}"
 
 echo "CI Release name is:"$CI_RELEASE
@@ -41,7 +50,7 @@ echo "CI ACR : ${CI_ACR}"
 echo "CI AGENT REPOSITORY NAME : ${CI_AGENT_REPO}"
 
 echo "tag linux agent image"
-docker tag ${CDPX_ACR}/artifact/3170cdd2-19f0-4027-912b-1027311691a2/official/${CDPX_REPO_NAME}:${CDPX_AGENT_IMAGE_TAG} ${CI_ACR}/public/azuremonitor/containerinsights/${CI_AGENT_REPO}:${imagetag}
+docker tag ${CDPX_ACR}/official/${CDPX_REPO_NAME}:${CDPX_AGENT_IMAGE_TAG} ${CI_ACR}/public/azuremonitor/containerinsights/${CI_AGENT_REPO}:${imagetag}
 
 echo "login ciprod acr":$CI_ACR
 docker login $CI_ACR --username $ACR_APP_ID --password $ACR_APP_SECRET
diff --git a/.pipelines/pull-from-cdpx-and-push-to-ci-acr-windows-image.sh b/.pipelines/pull-from-cdpx-and-push-to-ci-acr-windows-image.sh
index 066410af5..095a00039 100755
--- a/.pipelines/pull-from-cdpx-and-push-to-ci-acr-windows-image.sh
+++ b/.pipelines/pull-from-cdpx-and-push-to-ci-acr-windows-image.sh
@@ -25,12 +25,20 @@ ACR_APP_ID=$(cat ~/acrappid )
 ACR_APP_SECRET=$(cat ~/acrappsecret)
 echo "end: read appid and appsecret"
 
+echo "start: read appid and appsecret for cdpx"
+CDPX_ACR_APP_ID=$(cat ~/cdpxacrappid)
+CDPX_ACR_APP_SECRET=$(cat ~/cdpxacrappsecret)
+echo "end: read appid and appsecret which has read access on cdpx acr"
+
+# Name of CDPX_ACR should be in this format :Naming convention: 'cdpx' + service tree id without '-' + two digit suffix like'00'/'01
+# suffix 00 primary and 01 secondary, and we only use primary
+# This configured via pipeline variable
 echo "login to cdpxwindows acr:${CDPX_ACR}"
-docker login $CDPX_ACR  --username $ACR_APP_ID --password $ACR_APP_SECRET
+docker login $CDPX_ACR  --username $CDPX_ACR_APP_ID --password $CDPX_ACR_APP_SECRET
 echo "login to cdpxwindows acr:${CDPX_ACR} completed"
 
 echo "pull image from cdpxwin acr: ${CDPX_ACR}"
-docker pull ${CDPX_ACR}/artifact/3170cdd2-19f0-4027-912b-1027311691a2/official/${CDPX_REPO_NAME}:${CDPX_AGENT_IMAGE_TAG}
+docker pull ${CDPX_ACR}/official/${CDPX_REPO_NAME}:${CDPX_AGENT_IMAGE_TAG}
 echo "pull image from cdpxwin acr completed: ${CDPX_ACR}"
 
 echo "CI Release name:"$CI_RELEASE
@@ -40,7 +48,7 @@ imagetag="win-"$CI_RELEASE$CI_IMAGE_TAG_SUFFIX
 echo "agentimagetag="$imagetag
 
 echo "tag windows agent image"
-docker tag ${CDPX_ACR}/artifact/3170cdd2-19f0-4027-912b-1027311691a2/official/${CDPX_REPO_NAME}:${CDPX_AGENT_IMAGE_TAG} ${CI_ACR}/public/azuremonitor/containerinsights/${CI_AGENT_REPO}:${imagetag}
+docker tag ${CDPX_ACR}/official/${CDPX_REPO_NAME}:${CDPX_AGENT_IMAGE_TAG} ${CI_ACR}/public/azuremonitor/containerinsights/${CI_AGENT_REPO}:${imagetag}
 
 echo "login to ${CI_ACR} acr"
 docker login $CI_ACR --username $ACR_APP_ID --password $ACR_APP_SECRET

From 9061201be9b7578057479abf6e612a05ca412778 Mon Sep 17 00:00:00 2001
From: Grace Wehner <grace.wehner@microsoft.com>
Date: Tue, 15 Dec 2020 12:26:25 -0800
Subject: [PATCH 48/60] add pv fluentd plugin config to helm rs config (#477)

* add pv fluentd plugin to helm rs config

* helm rbac permissions for pv api calls
---
 .../templates/omsagent-rbac.yaml              |  2 +-
 .../templates/omsagent-rs-configmap.yaml      | 23 +++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/charts/azuremonitor-containers/templates/omsagent-rbac.yaml b/charts/azuremonitor-containers/templates/omsagent-rbac.yaml
index 4f7408e7c..bd4e9baf3 100644
--- a/charts/azuremonitor-containers/templates/omsagent-rbac.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-rbac.yaml
@@ -19,7 +19,7 @@ metadata:
     heritage: {{ .Release.Service }}
 rules:
 - apiGroups: [""]
-  resources: ["pods", "events", "nodes", "nodes/stats", "nodes/metrics", "nodes/spec", "nodes/proxy", "namespaces", "services"]
+  resources: ["pods", "events", "nodes", "nodes/stats", "nodes/metrics", "nodes/spec", "nodes/proxy", "namespaces", "services", "persistentvolumes"]
   verbs: ["list", "get", "watch"]
 - apiGroups: ["apps", "extensions", "autoscaling"]
   resources: ["replicasets", "deployments", "horizontalpodautoscalers"]
diff --git a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
index e1bc969cb..baeedf1be 100644
--- a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
@@ -20,6 +20,14 @@ data:
       log_level debug
      </source>
 
+     #Kubernetes Persistent Volume inventory
+     <source>
+      type kubepvinventory
+      tag oms.containerinsights.KubePVInventory
+      run_interval 60
+      log_level debug
+     </source>
+
      #Kubernetes events
      <source>
       type kubeevents
@@ -99,6 +107,21 @@ data:
       max_retry_wait 5m
      </match>
 
+     <match oms.containerinsights.KubePVInventory**>
+     type out_oms
+     log_level debug
+     num_threads 5
+     buffer_chunk_limit 4m
+     buffer_type file
+     buffer_path %STATE_DIR_WS%/state/out_oms_kubepv*.buffer
+     buffer_queue_limit 20
+     buffer_queue_full_action drop_oldest_chunk
+     flush_interval 20s
+     retry_limit 10
+     retry_wait 5s
+     max_retry_wait 5m
+    </match>
+
      <match oms.containerinsights.KubeEvents**>
       type out_oms
       log_level debug

From 064bc068f70bacec13af02f6ab74180186a98356 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 16 Dec 2020 15:22:13 -0800
Subject: [PATCH 49/60] Gangams/fix rs ooming (#473)

* optimize kpi

* optimize kube node inventory

* add flags for events, deployments and hpa

* have separate function parseNodeLimits

* refactor code

* fix crash

* fix bug with service name

* fix bugs related to get service name

* update oom fix test agent

* debug logs

* fix service label issue

* update to latest agent and enable ephemeral annotation

* change stream size to 200 from 250

* update yaml

* adjust chunksizes

* add ruby gc env

* yaml changes for cioomtest11282020-3

* telemetry to track pods latency

* service count telemetry

* rename variables

* wip

* nodes inventory telemetry

* configmap changes

* add emit streams in configmap

* yaml updates

* fix copy and paste bug

* add todo comments

* fix node latency telemetry bug

* update yaml with latest test image

* fix bug

* upping rs memory change

* fix mdm bug with final emit stream

* update to latest image

* fix pr feedback

* fix pr feedback

* rename health config to agent config

* fix max allowed hpa chunk size

* update to use 1k pod chunk since validated on 1.18+

* remove debug logs

* minor updates

* move defaults to common place

* chart updates

* final oomfix agent

* update to use prod image so that can be validated with build pipeline

* fix typo in comment
---
 .../installer/datafiles/base_container.data   |   2 +-
 .../scripts/tomlparser-agent-config.rb        | 172 +++++
 .../scripts/tomlparser-health-config.rb       |  73 --
 .../templates/omsagent-rs-configmap.yaml      |  32 +-
 charts/azuremonitor-containers/values.yaml    |   9 +
 kubernetes/linux/Dockerfile                   |   1 +
 kubernetes/linux/main.sh                      |  16 +-
 kubernetes/omsagent.yaml                      |  18 +-
 source/plugins/ruby/KubernetesApiClient.rb    | 387 +++++-----
 source/plugins/ruby/in_kube_events.rb         |  18 +-
 source/plugins/ruby/in_kube_nodes.rb          | 410 ++++++----
 source/plugins/ruby/in_kube_podinventory.rb   | 717 ++++++++++--------
 .../plugins/ruby/in_kubestate_deployments.rb  | 424 ++++++-----
 source/plugins/ruby/in_kubestate_hpa.rb       | 421 +++++-----
 14 files changed, 1534 insertions(+), 1166 deletions(-)
 create mode 100644 build/linux/installer/scripts/tomlparser-agent-config.rb
 delete mode 100644 build/linux/installer/scripts/tomlparser-health-config.rb

diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data
index ec42d5967..c680f0eea 100644
--- a/build/linux/installer/datafiles/base_container.data
+++ b/build/linux/installer/datafiles/base_container.data
@@ -123,7 +123,7 @@ MAINTAINER:              'Microsoft Corporation'
 /opt/tomlparser-mdm-metrics-config.rb;                          build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb;     755; root; root
 /opt/tomlparser-metric-collection-config.rb;                    build/linux/installer/scripts/tomlparser-metric-collection-config.rb;     755; root; root
 
-/opt/tomlparser-health-config.rb;                               build/linux/installer/scripts/tomlparser-health-config.rb;     755; root; root
+/opt/tomlparser-agent-config.rb;                                build/linux/installer/scripts/tomlparser-agent-config.rb;     755; root; root
 /opt/tomlparser.rb;                                             build/common/installer/scripts/tomlparser.rb;     755; root; root
 /opt/td-agent-bit-conf-customizer.rb;                           build/common/installer/scripts/td-agent-bit-conf-customizer.rb;     755; root; root
 /opt/ConfigParseErrorLogger.rb;                                 build/common/installer/scripts/ConfigParseErrorLogger.rb;           755; root; root
diff --git a/build/linux/installer/scripts/tomlparser-agent-config.rb b/build/linux/installer/scripts/tomlparser-agent-config.rb
new file mode 100644
index 000000000..87c5194ed
--- /dev/null
+++ b/build/linux/installer/scripts/tomlparser-agent-config.rb
@@ -0,0 +1,172 @@
+#!/usr/local/bin/ruby
+
+#this should be require relative in Linux and require in windows, since it is a gem install on windows
+@os_type = ENV["OS_TYPE"]
+if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+  require "tomlrb"
+else
+  require_relative "tomlrb"
+end
+
+require_relative "ConfigParseErrorLogger"
+
+@configMapMountPath = "/etc/config/settings/agent-settings"
+@configSchemaVersion = ""
+@enable_health_model = false
+
+# 250 Node items (15KB per node) account to approximately 4MB
+@nodesChunkSize = 250
+# 1000 pods (10KB per pod) account to approximately 10MB
+@podsChunkSize = 1000
+# 4000 events (1KB per event) account to approximately 4MB
+@eventsChunkSize = 4000
+# roughly each deployment is 8k
+# 500 deployments account to approximately 4MB
+@deploymentsChunkSize = 500
+# roughly each HPA is 3k
+# 2000 HPAs account to approximately 6-7MB
+@hpaChunkSize = 2000
+# stream batch sizes to avoid large file writes
+# too low will consume higher disk iops
+@podsEmitStreamBatchSize = 200
+@nodesEmitStreamBatchSize = 100
+
+# higher the chunk size rs pod memory consumption higher and lower api latency
+# similarly lower the value, helps on the memory consumption but incurrs additional round trip latency
+# these needs to be tuned be based on the workload
+# nodes
+@nodesChunkSizeMin = 100
+@nodesChunkSizeMax = 400
+# pods
+@podsChunkSizeMin = 250
+@podsChunkSizeMax = 1500
+# events
+@eventsChunkSizeMin = 2000
+@eventsChunkSizeMax = 10000
+# deployments
+@deploymentsChunkSizeMin = 500
+@deploymentsChunkSizeMax = 1000
+# hpa
+@hpaChunkSizeMin = 500
+@hpaChunkSizeMax = 2000
+
+# emit stream sizes to prevent lower values which costs disk i/o
+# max will be upto the chunk size
+@podsEmitStreamBatchSizeMin = 50
+@nodesEmitStreamBatchSizeMin = 50
+
+def is_number?(value)
+  true if Integer(value) rescue false
+end
+
+# Use parser to parse the configmap toml file to a ruby structure
+def parseConfigMap
+  begin
+    # Check to see if config map is created
+    if (File.file?(@configMapMountPath))
+      puts "config::configmap container-azm-ms-agentconfig for agent settings mounted, parsing values"
+      parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true)
+      puts "config::Successfully parsed mounted config map"
+      return parsedConfig
+    else
+      puts "config::configmap container-azm-ms-agentconfig for agent settings not mounted, using defaults"
+      return nil
+    end
+  rescue => errorStr
+    ConfigParseErrorLogger.logError("Exception while parsing config map for agent settings : #{errorStr}, using defaults, please check config map for errors")
+    return nil
+  end
+end
+
+# Use the ruby structure created after config parsing to set the right values to be used as environment variables
+def populateSettingValuesFromConfigMap(parsedConfig)
+  begin
+    if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil?
+      if !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
+        @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled]
+        puts "enable_health_model = #{@enable_health_model}"
+      end
+      chunk_config = parsedConfig[:agent_settings][:chunk_config]
+      if !chunk_config.nil?
+        nodesChunkSize = chunk_config[:NODES_CHUNK_SIZE]
+        if !nodesChunkSize.nil? && is_number?(nodesChunkSize) && (@nodesChunkSizeMin..@nodesChunkSizeMax) === nodesChunkSize.to_i
+          @nodesChunkSize = nodesChunkSize.to_i
+          puts "Using config map value: NODES_CHUNK_SIZE = #{@nodesChunkSize}"
+        end
+
+        podsChunkSize = chunk_config[:PODS_CHUNK_SIZE]
+        if !podsChunkSize.nil? && is_number?(podsChunkSize) && (@podsChunkSizeMin..@podsChunkSizeMax) === podsChunkSize.to_i
+          @podsChunkSize = podsChunkSize.to_i
+          puts "Using config map value: PODS_CHUNK_SIZE = #{@podsChunkSize}"
+        end
+
+        eventsChunkSize = chunk_config[:EVENTS_CHUNK_SIZE]
+        if !eventsChunkSize.nil? && is_number?(eventsChunkSize) && (@eventsChunkSizeMin..@eventsChunkSizeMax) === eventsChunkSize.to_i
+          @eventsChunkSize = eventsChunkSize.to_i
+          puts "Using config map value: EVENTS_CHUNK_SIZE = #{@eventsChunkSize}"
+        end
+
+        deploymentsChunkSize = chunk_config[:DEPLOYMENTS_CHUNK_SIZE]
+        if !deploymentsChunkSize.nil? && is_number?(deploymentsChunkSize) && (@deploymentsChunkSizeMin..@deploymentsChunkSizeMax) === deploymentsChunkSize.to_i
+          @deploymentsChunkSize = deploymentsChunkSize.to_i
+          puts "Using config map value: DEPLOYMENTS_CHUNK_SIZE = #{@deploymentsChunkSize}"
+        end
+
+        hpaChunkSize = chunk_config[:HPA_CHUNK_SIZE]
+        if !hpaChunkSize.nil? && is_number?(hpaChunkSize) && (@hpaChunkSizeMin..@hpaChunkSizeMax) === hpaChunkSize.to_i
+          @hpaChunkSize = hpaChunkSize.to_i
+          puts "Using config map value: HPA_CHUNK_SIZE = #{@hpaChunkSize}"
+        end
+
+        podsEmitStreamBatchSize = chunk_config[:PODS_EMIT_STREAM_BATCH_SIZE]
+        if !podsEmitStreamBatchSize.nil? && is_number?(podsEmitStreamBatchSize) &&
+           podsEmitStreamBatchSize.to_i <= @podsChunkSize && podsEmitStreamBatchSize.to_i >= @podsEmitStreamBatchSizeMin
+          @podsEmitStreamBatchSize = podsEmitStreamBatchSize.to_i
+          puts "Using config map value: PODS_EMIT_STREAM_BATCH_SIZE = #{@podsEmitStreamBatchSize}"
+        end
+        nodesEmitStreamBatchSize = chunk_config[:NODES_EMIT_STREAM_BATCH_SIZE]
+        if !nodesEmitStreamBatchSize.nil? && is_number?(nodesEmitStreamBatchSize) &&
+           nodesEmitStreamBatchSize.to_i <= @nodesChunkSize && nodesEmitStreamBatchSize.to_i >= @nodesEmitStreamBatchSizeMin
+          @nodesEmitStreamBatchSize = nodesEmitStreamBatchSize.to_i
+          puts "Using config map value: NODES_EMIT_STREAM_BATCH_SIZE = #{@nodesEmitStreamBatchSize}"
+        end
+      end
+    end
+  rescue => errorStr
+    puts "config::error:Exception while reading config settings for agent configuration setting - #{errorStr}, using defaults"
+    @enable_health_model = false
+  end
+end
+
+@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
+puts "****************Start Config Processing********************"
+if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
+  configMapSettings = parseConfigMap
+  if !configMapSettings.nil?
+    populateSettingValuesFromConfigMap(configMapSettings)
+  end
+else
+  if (File.file?(@configMapMountPath))
+    ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
+  end
+  @enable_health_model = false
+end
+
+# Write the settings to file, so that they can be set as environment variables
+file = File.open("agent_config_env_var", "w")
+
+if !file.nil?
+  file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n")
+  file.write("export NODES_CHUNK_SIZE=#{@nodesChunkSize}\n")
+  file.write("export PODS_CHUNK_SIZE=#{@podsChunkSize}\n")
+  file.write("export EVENTS_CHUNK_SIZE=#{@eventsChunkSize}\n")
+  file.write("export DEPLOYMENTS_CHUNK_SIZE=#{@deploymentsChunkSize}\n")
+  file.write("export HPA_CHUNK_SIZE=#{@hpaChunkSize}\n")
+  file.write("export PODS_EMIT_STREAM_BATCH_SIZE=#{@podsEmitStreamBatchSize}\n")
+  file.write("export NODES_EMIT_STREAM_BATCH_SIZE=#{@nodesEmitStreamBatchSize}\n")
+  # Close file after writing all environment variables
+  file.close
+else
+  puts "Exception while opening file for writing config environment variables"
+  puts "****************End Config Processing********************"
+end
diff --git a/build/linux/installer/scripts/tomlparser-health-config.rb b/build/linux/installer/scripts/tomlparser-health-config.rb
deleted file mode 100644
index 14c8bdb44..000000000
--- a/build/linux/installer/scripts/tomlparser-health-config.rb
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/local/bin/ruby
-
-#this should be require relative in Linux and require in windows, since it is a gem install on windows
-@os_type = ENV["OS_TYPE"]
-if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
-  require "tomlrb"
-else
-  require_relative "tomlrb"
-end
-
-require_relative "ConfigParseErrorLogger"
-
-@configMapMountPath = "/etc/config/settings/agent-settings"
-@configSchemaVersion = ""
-@enable_health_model = false
-
-# Use parser to parse the configmap toml file to a ruby structure
-def parseConfigMap
-  begin
-    # Check to see if config map is created
-    if (File.file?(@configMapMountPath))
-      puts "config::configmap container-azm-ms-agentconfig for agent health settings mounted, parsing values"
-      parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true)
-      puts "config::Successfully parsed mounted config map"
-      return parsedConfig
-    else
-      puts "config::configmap container-azm-ms-agentconfig for agent health settings not mounted, using defaults"
-      return nil
-    end
-  rescue => errorStr
-    ConfigParseErrorLogger.logError("Exception while parsing config map for enabling health: #{errorStr}, using defaults, please check config map for errors")
-    return nil
-  end
-end
-
-# Use the ruby structure created after config parsing to set the right values to be used as environment variables
-def populateSettingValuesFromConfigMap(parsedConfig)
-  begin
-    if !parsedConfig.nil? && !parsedConfig[:agent_settings].nil? && !parsedConfig[:agent_settings][:health_model].nil? && !parsedConfig[:agent_settings][:health_model][:enabled].nil?
-        @enable_health_model = parsedConfig[:agent_settings][:health_model][:enabled]
-        puts "enable_health_model = #{@enable_health_model}"
-    end
-  rescue => errorStr
-    puts "config::error:Exception while reading config settings for health_model enabled setting - #{errorStr}, using defaults"
-    @enable_health_model = false
-  end
-end
-
-@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
-puts "****************Start Config Processing********************"
-if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version , so hardcoding it
-  configMapSettings = parseConfigMap
-  if !configMapSettings.nil?
-    populateSettingValuesFromConfigMap(configMapSettings)
-  end
-else
-  if (File.file?(@configMapMountPath))
-    ConfigParseErrorLogger.logError("config::unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version")
-  end
-  @enable_health_model = false
-end
-
-# Write the settings to file, so that they can be set as environment variables
-file = File.open("health_config_env_var", "w")
-
-if !file.nil?
-  file.write("export AZMON_CLUSTER_ENABLE_HEALTH_MODEL=#{@enable_health_model}\n")
-  # Close file after writing all environment variables
-  file.close
-else
-  puts "Exception while opening file for writing config environment variables"
-  puts "****************End Config Processing********************"
-end
\ No newline at end of file
diff --git a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
index baeedf1be..fc7c471f8 100644
--- a/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-rs-configmap.yaml
@@ -95,7 +95,7 @@ data:
      <match oms.containerinsights.KubePodInventory**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
@@ -108,24 +108,24 @@ data:
      </match>
 
      <match oms.containerinsights.KubePVInventory**>
-     type out_oms
-     log_level debug
-     num_threads 5
-     buffer_chunk_limit 4m
-     buffer_type file
-     buffer_path %STATE_DIR_WS%/state/out_oms_kubepv*.buffer
-     buffer_queue_limit 20
-     buffer_queue_full_action drop_oldest_chunk
-     flush_interval 20s
-     retry_limit 10
-     retry_wait 5s
-     max_retry_wait 5m
+      type out_oms
+      log_level debug
+      num_threads 5
+      buffer_chunk_limit 4m
+      buffer_type file
+      buffer_path %STATE_DIR_WS%/state/out_oms_kubepv*.buffer
+      buffer_queue_limit 20
+      buffer_queue_full_action drop_oldest_chunk
+      flush_interval 20s
+      retry_limit 10
+      retry_wait 5s
+      max_retry_wait 5m
     </match>
 
      <match oms.containerinsights.KubeEvents**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
@@ -155,7 +155,7 @@ data:
      <match oms.containerinsights.KubeNodeInventory**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
@@ -184,7 +184,7 @@ data:
      <match oms.api.KubePerf**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index e8acda20e..907e315d1 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -81,6 +81,15 @@ omsagent:
   deployment:
     affinity:
       nodeAffinity:
+        # affinity to schedule on to ephemeral os node if its available
+        preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 1
+            preference:
+              matchExpressions:
+              - key: storageprofile
+                operator: NotIn
+                values:
+                - managed
         requiredDuringSchedulingIgnoredDuringExecution:
           nodeSelectorTerms:
             - labelSelector:
diff --git a/kubernetes/linux/Dockerfile b/kubernetes/linux/Dockerfile
index d04e86128..34ab133da 100644
--- a/kubernetes/linux/Dockerfile
+++ b/kubernetes/linux/Dockerfile
@@ -15,6 +15,7 @@ ENV HOST_VAR /hostfs/var
 ENV AZMON_COLLECT_ENV False
 ENV KUBE_CLIENT_BACKOFF_BASE 1
 ENV KUBE_CLIENT_BACKOFF_DURATION 0
+ENV RUBY_GC_HEAP_OLDOBJECT_LIMIT_FACTOR 0.9
 RUN /usr/bin/apt-get update && /usr/bin/apt-get install -y libc-bin wget openssl curl sudo python-ctypes init-system-helpers  net-tools rsyslog cron vim dmidecode apt-transport-https gnupg && rm -rf /var/lib/apt/lists/*
 COPY setup.sh main.sh defaultpromenvvariables defaultpromenvvariables-rs mdsd.xml envmdsd $tmpdir/
 WORKDIR ${tmpdir}
diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh
index a2ba6a1d1..ed16d3e32 100644
--- a/kubernetes/linux/main.sh
+++ b/kubernetes/linux/main.sh
@@ -171,14 +171,14 @@ done
 source config_env_var
 
 
-#Parse the configmap to set the right environment variables for health feature.
-/opt/microsoft/omsagent/ruby/bin/ruby tomlparser-health-config.rb
+#Parse the configmap to set the right environment variables for agent config.
+/opt/microsoft/omsagent/ruby/bin/ruby tomlparser-agent-config.rb
 
-cat health_config_env_var | while read line; do
+cat agent_config_env_var | while read line; do
     #echo $line
     echo $line >> ~/.bashrc
 done
-source health_config_env_var
+source agent_config_env_var
 
 #Parse the configmap to set the right environment variables for network policy manager (npm) integration.
 /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-npm-config.rb
@@ -429,7 +429,7 @@ echo "export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION" >> ~/.bashrc
 
 #region check to auto-activate oneagent, to route container logs,
 #Intent is to activate one agent routing for all managed clusters with region in the regionllist, unless overridden by configmap
-# AZMON_CONTAINER_LOGS_ROUTE  will have route (if any) specified in the config map 
+# AZMON_CONTAINER_LOGS_ROUTE  will have route (if any) specified in the config map
 # AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE will have the final route that we compute & set, based on our region list logic
 echo "************start oneagent log routing checks************"
 # by default, use configmap route for safer side
@@ -462,9 +462,9 @@ else
   echo "current region is not in oneagent regions..."
 fi
 
-if [ "$isoneagentregion" = true ]; then 
+if [ "$isoneagentregion" = true ]; then
    #if configmap has a routing for logs, but current region is in the oneagent region list, take the configmap route
-   if [ ! -z $AZMON_CONTAINER_LOGS_ROUTE ]; then   
+   if [ ! -z $AZMON_CONTAINER_LOGS_ROUTE ]; then
       AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$AZMON_CONTAINER_LOGS_ROUTE
       echo "oneagent region is true for current region:$currentregion and config map logs route is not empty. so using config map logs route as effective route:$AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
    else #there is no configmap route, so route thru oneagent
@@ -511,7 +511,7 @@ if [ ! -e "/etc/config/kube.conf" ]; then
 
             echo "starting mdsd ..."
             mdsd -l -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos &
-            
+
             touch /opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2
       fi
    fi
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 26c7ae9a0..013e2a6c0 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -143,7 +143,7 @@ data:
      <match oms.containerinsights.KubePodInventory**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
@@ -173,7 +173,7 @@ data:
      <match oms.containerinsights.KubeEvents**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
@@ -203,7 +203,7 @@ data:
      <match oms.containerinsights.KubeNodeInventory**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
@@ -232,7 +232,7 @@ data:
      <match oms.api.KubePerf**>
       type out_oms
       log_level debug
-      num_threads 5
+      num_threads 2
       buffer_chunk_limit 4m
       buffer_type file
       buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
@@ -533,7 +533,6 @@ spec:
               cpu: 150m
               memory: 250Mi
           env:
-            # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these
             - name: AKS_RESOURCE_ID
               value: "VALUE_AKS_RESOURCE_ID_VALUE"
             - name: AKS_REGION
@@ -588,6 +587,15 @@ spec:
             periodSeconds: 60
       affinity:
         nodeAffinity:
+          # affinity to schedule on to ephemeral os node if its available
+          preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 1
+            preference:
+              matchExpressions:
+              - key: storageprofile
+                operator: NotIn
+                values:
+                - managed
           requiredDuringSchedulingIgnoredDuringExecution:
             nodeSelectorTerms:
               - labelSelector:
diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb
index 073eb0417..aca2142a0 100644
--- a/source/plugins/ruby/KubernetesApiClient.rb
+++ b/source/plugins/ruby/KubernetesApiClient.rb
@@ -172,6 +172,10 @@ def isAROV3Cluster
       return @@IsAROV3Cluster
     end
 
+    def isAROv3MasterOrInfraPod(nodeName)
+      return isAROV3Cluster() && (!nodeName.nil? && (nodeName.downcase.start_with?("infra-") || nodeName.downcase.start_with?("master-")))
+    end
+
     def isNodeMaster
       return @@IsNodeMaster if !@@IsNodeMaster.nil?
       @@IsNodeMaster = false
@@ -276,7 +280,8 @@ def getPods(namespace)
     def getWindowsNodes
       winNodes = []
       begin
-        resourceUri = getNodesResourceUri("nodes")
+        # get only windows nodes
+        resourceUri = getNodesResourceUri("nodes?labelSelector=kubernetes.io%2Fos%3Dwindows")
         nodeInventory = JSON.parse(getKubeResourceInfo(resourceUri).body)
         @Log.info "KubernetesAPIClient::getWindowsNodes : Got nodes from kube api"
         # Resetting the windows node cache
@@ -396,42 +401,67 @@ def getPodUid(podNameSpace, podMetadata)
       return podUid
     end
 
-    def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+    def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItems = []
       begin
         clusterId = getClusterId
-        metricInfo = metricJSON
-        metricInfo["items"].each do |pod|
-          podNameSpace = pod["metadata"]["namespace"]
-          podUid = getPodUid(podNameSpace, pod["metadata"])
-          if podUid.nil?
-            next
-          end
-
-          # For ARO, skip the pods scheduled on to master or infra nodes to ingest
-          if isAROV3Cluster() && !pod["spec"].nil? && !pod["spec"]["nodeName"].nil? &&
-             (pod["spec"]["nodeName"].downcase.start_with?("infra-") ||
-              pod["spec"]["nodeName"].downcase.start_with?("master-"))
-            next
-          end
+        podNameSpace = pod["metadata"]["namespace"]
+        podUid = getPodUid(podNameSpace, pod["metadata"])
+        if podUid.nil?
+          return metricItems
+        end
 
-          podContainers = []
-          if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
-            podContainers = podContainers + pod["spec"]["containers"]
-          end
-          # Adding init containers to the record list as well.
-          if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty?
-            podContainers = podContainers + pod["spec"]["initContainers"]
-          end
+        nodeName = ""
+        #for unscheduled (non-started) pods nodeName does NOT exist
+        if !pod["spec"]["nodeName"].nil?
+          nodeName = pod["spec"]["nodeName"]
+        end
+        # For ARO, skip the pods scheduled on to master or infra nodes to ingest
+        if isAROv3MasterOrInfraPod(nodeName)
+          return metricItems
+        end
 
-          if (!podContainers.nil? && !podContainers.empty? && !pod["spec"]["nodeName"].nil?)
-            nodeName = pod["spec"]["nodeName"]
-            podContainers.each do |container|
-              containerName = container["name"]
-              #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-              if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
-                metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
+        podContainers = []
+        if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
+          podContainers = podContainers + pod["spec"]["containers"]
+        end
+        # Adding init containers to the record list as well.
+        if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty?
+          podContainers = podContainers + pod["spec"]["initContainers"]
+        end
 
+        if (!podContainers.nil? && !podContainers.empty? && !pod["spec"]["nodeName"].nil?)
+          podContainers.each do |container|
+            containerName = container["name"]
+            #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+            if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
+              metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
+
+              metricItem = {}
+              metricItem["DataItems"] = []
+
+              metricProps = {}
+              metricProps["Timestamp"] = metricTime
+              metricProps["Host"] = nodeName
+              # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
+              metricProps["Computer"] = nodeName
+              metricProps["ObjectName"] = "K8SContainer"
+              metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
+
+              metricProps["Collections"] = []
+              metricCollections = {}
+              metricCollections["CounterName"] = metricNametoReturn
+              metricCollections["Value"] = metricValue
+
+              metricProps["Collections"].push(metricCollections)
+              metricItem["DataItems"].push(metricProps)
+              metricItems.push(metricItem)
+              #No container level limit for the given metric, so default to node level limit
+            else
+              nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
+              if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey))
+                metricValue = @@NodeMetrics[nodeMetricsHashKey]
+                #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ")
                 metricItem = {}
                 metricItem["DataItems"] = []
 
@@ -451,32 +481,6 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
                 metricProps["Collections"].push(metricCollections)
                 metricItem["DataItems"].push(metricProps)
                 metricItems.push(metricItem)
-                #No container level limit for the given metric, so default to node level limit
-              else
-                nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
-                if (metricCategory == "limits" && @@NodeMetrics.has_key?(nodeMetricsHashKey))
-                  metricValue = @@NodeMetrics[nodeMetricsHashKey]
-                  #@Log.info("Limits not set for container #{clusterId + "/" + podUid + "/" + containerName} using node level limits: #{nodeMetricsHashKey}=#{metricValue} ")
-                  metricItem = {}
-                  metricItem["DataItems"] = []
-
-                  metricProps = {}
-                  metricProps["Timestamp"] = metricTime
-                  metricProps["Host"] = nodeName
-                  # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
-                  metricProps["Computer"] = nodeName
-                  metricProps["ObjectName"] = "K8SContainer"
-                  metricProps["InstanceName"] = clusterId + "/" + podUid + "/" + containerName
-
-                  metricProps["Collections"] = []
-                  metricCollections = {}
-                  metricCollections["CounterName"] = metricNametoReturn
-                  metricCollections["Value"] = metricValue
-
-                  metricProps["Collections"].push(metricCollections)
-                  metricItem["DataItems"].push(metricProps)
-                  metricItems.push(metricItem)
-                end
               end
             end
           end
@@ -488,78 +492,74 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName
       return metricItems
     end #getContainerResourceRequestAndLimits
 
-    def getContainerResourceRequestsAndLimitsAsInsightsMetrics(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+    def getContainerResourceRequestsAndLimitsAsInsightsMetrics(pod, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
       metricItems = []
       begin
         clusterId = getClusterId
         clusterName = getClusterName
-
-        metricInfo = metricJSON
-        metricInfo["items"].each do |pod|
-          podNameSpace = pod["metadata"]["namespace"]
-          if podNameSpace.eql?("kube-system") && !pod["metadata"].key?("ownerReferences")
-            # The above case seems to be the only case where you have horizontal scaling of pods
-            # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash
-            # instead of the actual poduid. Since this uid is not being surface into the UX
-            # its ok to use this.
-            # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
-            if pod["metadata"]["annotations"].nil?
-              next
-            else
-              podUid = pod["metadata"]["annotations"]["kubernetes.io/config.hash"]
-            end
+        podNameSpace = pod["metadata"]["namespace"]
+        if podNameSpace.eql?("kube-system") && !pod["metadata"].key?("ownerReferences")
+          # The above case seems to be the only case where you have horizontal scaling of pods
+          # but no controller, in which case cAdvisor picks up kubernetes.io/config.hash
+          # instead of the actual poduid. Since this uid is not being surface into the UX
+          # its ok to use this.
+          # Use kubernetes.io/config.hash to be able to correlate with cadvisor data
+          if pod["metadata"]["annotations"].nil?
+            return metricItems
           else
-            podUid = pod["metadata"]["uid"]
+            podUid = pod["metadata"]["annotations"]["kubernetes.io/config.hash"]
           end
+        else
+          podUid = pod["metadata"]["uid"]
+        end
 
-          podContainers = []
-          if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
-            podContainers = podContainers + pod["spec"]["containers"]
-          end
-          # Adding init containers to the record list as well.
-          if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty?
-            podContainers = podContainers + pod["spec"]["initContainers"]
-          end
+        podContainers = []
+        if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty?
+          podContainers = podContainers + pod["spec"]["containers"]
+        end
+        # Adding init containers to the record list as well.
+        if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty?
+          podContainers = podContainers + pod["spec"]["initContainers"]
+        end
 
-          if (!podContainers.nil? && !podContainers.empty?)
-            if (!pod["spec"]["nodeName"].nil?)
-              nodeName = pod["spec"]["nodeName"]
+        if (!podContainers.nil? && !podContainers.empty?)
+          if (!pod["spec"]["nodeName"].nil?)
+            nodeName = pod["spec"]["nodeName"]
+          else
+            nodeName = "" #unscheduled pod. We still want to collect limits & requests for GPU
+          end
+          podContainers.each do |container|
+            metricValue = nil
+            containerName = container["name"]
+            #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+            if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
+              metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
             else
-              nodeName = "" #unscheduled pod. We still want to collect limits & requests for GPU
-            end
-            podContainers.each do |container|
-              metricValue = nil
-              containerName = container["name"]
-              #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-              if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?)
-                metricValue = getMetricNumericValue(metricNameToCollect, container["resources"][metricCategory][metricNameToCollect])
-              else
-                #No container level limit for the given metric, so default to node level limit for non-gpu metrics
-                if (metricNameToCollect.downcase != "nvidia.com/gpu") && (metricNameToCollect.downcase != "amd.com/gpu")
-                  nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
-                  metricValue = @@NodeMetrics[nodeMetricsHashKey]
-                end
-              end
-              if (!metricValue.nil?)
-                metricItem = {}
-                metricItem["CollectionTime"] = metricTime
-                metricItem["Computer"] = nodeName
-                metricItem["Name"] = metricNametoReturn
-                metricItem["Value"] = metricValue
-                metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
-                metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
-
-                metricTags = {}
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CONTAINER_NAME] = podUid + "/" + containerName
-                #metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = podNameSpace
-
-                metricItem["Tags"] = metricTags
-
-                metricItems.push(metricItem)
+              #No container level limit for the given metric, so default to node level limit for non-gpu metrics
+              if (metricNameToCollect.downcase != "nvidia.com/gpu") && (metricNameToCollect.downcase != "amd.com/gpu")
+                nodeMetricsHashKey = clusterId + "/" + nodeName + "_" + "allocatable" + "_" + metricNameToCollect
+                metricValue = @@NodeMetrics[nodeMetricsHashKey]
               end
             end
+            if (!metricValue.nil?)
+              metricItem = {}
+              metricItem["CollectionTime"] = metricTime
+              metricItem["Computer"] = nodeName
+              metricItem["Name"] = metricNametoReturn
+              metricItem["Value"] = metricValue
+              metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
+              metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
+
+              metricTags = {}
+              metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId
+              metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
+              metricTags[Constants::INSIGHTSMETRICS_TAGS_CONTAINER_NAME] = podUid + "/" + containerName
+              #metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = podNameSpace
+
+              metricItem["Tags"] = metricTags
+
+              metricItems.push(metricItem)
+            end
           end
         end
       rescue => error
@@ -578,32 +578,9 @@ def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNamet
         #if we are coming up with the time it should be same for all nodes
         #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
         metricInfo["items"].each do |node|
-          if (!node["status"][metricCategory].nil?)
-
-            # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory"
-            metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
-
-            metricItem = {}
-            metricItem["DataItems"] = []
-            metricProps = {}
-            metricProps["Timestamp"] = metricTime
-            metricProps["Host"] = node["metadata"]["name"]
-            # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
-            metricProps["Computer"] = node["metadata"]["name"]
-            metricProps["ObjectName"] = "K8SNode"
-            metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"]
-            metricProps["Collections"] = []
-            metricCollections = {}
-            metricCollections["CounterName"] = metricNametoReturn
-            metricCollections["Value"] = metricValue
-
-            metricProps["Collections"].push(metricCollections)
-            metricItem["DataItems"].push(metricProps)
+          metricItem = parseNodeLimitsFromNodeItem(node, metricCategory, metricNameToCollect, metricNametoReturn, metricTime)
+          if !metricItem.nil? && !metricItem.empty?
             metricItems.push(metricItem)
-            #push node level metrics to a inmem hash so that we can use it looking up at container level.
-            #Currently if container level cpu & memory limits are not defined we default to node level limits
-            @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
-            #@Log.info ("Node metric hash: #{@@NodeMetrics}")
           end
         end
       rescue => error
@@ -612,49 +589,82 @@ def parseNodeLimits(metricJSON, metricCategory, metricNameToCollect, metricNamet
       return metricItems
     end #parseNodeLimits
 
-    def parseNodeLimitsAsInsightsMetrics(metricJSON, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
-      metricItems = []
+    def parseNodeLimitsFromNodeItem(node, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+      metricItem = {}
       begin
-        metricInfo = metricJSON
         clusterId = getClusterId
-        clusterName = getClusterName
         #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
         #if we are coming up with the time it should be same for all nodes
         #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
-        metricInfo["items"].each do |node|
-          if (!node["status"][metricCategory].nil?) && (!node["status"][metricCategory][metricNameToCollect].nil?)
-
-            # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory" or "amd.com/gpu" or "nvidia.com/gpu"
-            metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
-
-            metricItem = {}
-            metricItem["CollectionTime"] = metricTime
-            metricItem["Computer"] = node["metadata"]["name"]
-            metricItem["Name"] = metricNametoReturn
-            metricItem["Value"] = metricValue
-            metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
-            metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
-
-            metricTags = {}
-            metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId
-            metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
-            metricTags[Constants::INSIGHTSMETRICS_TAGS_GPU_VENDOR] = metricNameToCollect
-
-            metricItem["Tags"] = metricTags
+        if (!node["status"][metricCategory].nil?) && (!node["status"][metricCategory][metricNameToCollect].nil?)
+          # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory"
+          metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
+
+          metricItem["DataItems"] = []
+          metricProps = {}
+          metricProps["Timestamp"] = metricTime
+          metricProps["Host"] = node["metadata"]["name"]
+          # Adding this so that it is not set by base omsagent since it was not set earlier and being set by base omsagent
+          metricProps["Computer"] = node["metadata"]["name"]
+          metricProps["ObjectName"] = "K8SNode"
+          metricProps["InstanceName"] = clusterId + "/" + node["metadata"]["name"]
+          metricProps["Collections"] = []
+          metricCollections = {}
+          metricCollections["CounterName"] = metricNametoReturn
+          metricCollections["Value"] = metricValue
+
+          metricProps["Collections"].push(metricCollections)
+          metricItem["DataItems"].push(metricProps)
+
+          #push node level metrics to a inmem hash so that we can use it looking up at container level.
+          #Currently if container level cpu & memory limits are not defined we default to node level limits
+          @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
+          #@Log.info ("Node metric hash: #{@@NodeMetrics}")
+        end
+      rescue => error
+        @Log.warn("parseNodeLimitsFromNodeItem failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
+      end
+      return metricItem
+    end #parseNodeLimitsFromNodeItem
 
-            metricItems.push(metricItem)
-            #push node level metrics (except gpu ones) to a inmem hash so that we can use it looking up at container level.
-            #Currently if container level cpu & memory limits are not defined we default to node level limits
-            if (metricNameToCollect.downcase != "nvidia.com/gpu") && (metricNameToCollect.downcase != "amd.com/gpu")
-              @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
-              #@Log.info ("Node metric hash: #{@@NodeMetrics}")
-            end
+    def parseNodeLimitsAsInsightsMetrics(node, metricCategory, metricNameToCollect, metricNametoReturn, metricTime = Time.now.utc.iso8601)
+      metricItem = {}
+      begin
+        #Since we are getting all node data at the same time and kubernetes doesnt specify a timestamp for the capacity and allocation metrics,
+        #if we are coming up with the time it should be same for all nodes
+        #metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z
+        if (!node["status"][metricCategory].nil?) && (!node["status"][metricCategory][metricNameToCollect].nil?)
+          clusterId = getClusterId
+          clusterName = getClusterName
+
+          # metricCategory can be "capacity" or "allocatable" and metricNameToCollect can be "cpu" or "memory" or "amd.com/gpu" or "nvidia.com/gpu"
+          metricValue = getMetricNumericValue(metricNameToCollect, node["status"][metricCategory][metricNameToCollect])
+
+          metricItem["CollectionTime"] = metricTime
+          metricItem["Computer"] = node["metadata"]["name"]
+          metricItem["Name"] = metricNametoReturn
+          metricItem["Value"] = metricValue
+          metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
+          metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE
+
+          metricTags = {}
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_GPU_VENDOR] = metricNameToCollect
+
+          metricItem["Tags"] = metricTags
+
+          #push node level metrics (except gpu ones) to a inmem hash so that we can use it looking up at container level.
+          #Currently if container level cpu & memory limits are not defined we default to node level limits
+          if (metricNameToCollect.downcase != "nvidia.com/gpu") && (metricNameToCollect.downcase != "amd.com/gpu")
+            @@NodeMetrics[clusterId + "/" + node["metadata"]["name"] + "_" + metricCategory + "_" + metricNameToCollect] = metricValue
+            #@Log.info ("Node metric hash: #{@@NodeMetrics}")
           end
         end
       rescue => error
         @Log.warn("parseNodeLimitsAsInsightsMetrics failed: #{error} for metric #{metricCategory} #{metricNameToCollect}")
       end
-      return metricItems
+      return metricItem
     end
 
     def getMetricNumericValue(metricName, metricVal)
@@ -777,5 +787,32 @@ def getKubeAPIServerUrl
       end
       return apiServerUrl
     end
+
+    def getKubeServicesInventoryRecords(serviceList, batchTime = Time.utc.iso8601)
+      kubeServiceRecords = []
+      begin
+        if (!serviceList.nil? && !serviceList.empty?)
+          servicesCount = serviceList["items"].length
+          @Log.info("KubernetesApiClient::getKubeServicesInventoryRecords : number of services in serviceList  #{servicesCount} @ #{Time.now.utc.iso8601}")
+          serviceList["items"].each do |item|
+            kubeServiceRecord = {}
+            kubeServiceRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+            kubeServiceRecord["ServiceName"] = item["metadata"]["name"]
+            kubeServiceRecord["Namespace"] = item["metadata"]["namespace"]
+            kubeServiceRecord["SelectorLabels"] = [item["spec"]["selector"]]
+            # added these before emit to avoid memory foot print
+            # kubeServiceRecord["ClusterId"] = KubernetesApiClient.getClusterId
+            # kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName
+            kubeServiceRecord["ClusterIP"] = item["spec"]["clusterIP"]
+            kubeServiceRecord["ServiceType"] = item["spec"]["type"]
+            kubeServiceRecords.push(kubeServiceRecord.dup)
+          end
+        end
+      rescue => errorStr
+        @Log.warn "KubernetesApiClient::getKubeServicesInventoryRecords:Failed with an error : #{errorStr}"
+        ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+      end
+      return kubeServiceRecords
+    end
   end
 end
diff --git a/source/plugins/ruby/in_kube_events.rb b/source/plugins/ruby/in_kube_events.rb
index 6f59a3fc1..4f6017cc5 100644
--- a/source/plugins/ruby/in_kube_events.rb
+++ b/source/plugins/ruby/in_kube_events.rb
@@ -17,8 +17,9 @@ def initialize
       require_relative "omslog"
       require_relative "ApplicationInsightsUtility"
 
-      # 30000 events account to approximately 5MB
-      @EVENTS_CHUNK_SIZE = 30000
+      # refer tomlparser-agent-config for defaults
+      # this configurable via configmap
+      @EVENTS_CHUNK_SIZE = 0
 
       # Initializing events count for telemetry
       @eventsCount = 0
@@ -36,6 +37,15 @@ def configure(conf)
 
     def start
       if @run_interval
+        if !ENV["EVENTS_CHUNK_SIZE"].nil? && !ENV["EVENTS_CHUNK_SIZE"].empty? && ENV["EVENTS_CHUNK_SIZE"].to_i > 0
+          @EVENTS_CHUNK_SIZE = ENV["EVENTS_CHUNK_SIZE"].to_i
+        else
+          # this shouldnt happen just setting default here as safe guard
+          $log.warn("in_kube_events::start: setting to default value since got EVENTS_CHUNK_SIZE nil or empty")
+          @EVENTS_CHUNK_SIZE = 4000
+        end
+        $log.info("in_kube_events::start : EVENTS_CHUNK_SIZE  @ #{@EVENTS_CHUNK_SIZE}")
+
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
@@ -82,6 +92,8 @@ def enumerate
         end
         $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}")
         if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
+          eventsCount = eventList["items"].length
+          $log.info "in_kube_events::enumerate:Received number of events in eventList is #{eventsCount} @ #{Time.now.utc.iso8601}"
           newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
         else
           $log.warn "in_kube_events::enumerate:Received empty eventList"
@@ -91,6 +103,8 @@ def enumerate
         while (!continuationToken.nil? && !continuationToken.empty?)
           continuationToken, eventList = KubernetesApiClient.getResourcesAndContinuationToken("events?fieldSelector=type!=Normal&limit=#{@EVENTS_CHUNK_SIZE}&continue=#{continuationToken}")
           if (!eventList.nil? && !eventList.empty? && eventList.key?("items") && !eventList["items"].nil? && !eventList["items"].empty?)
+            eventsCount = eventList["items"].length
+            $log.info "in_kube_events::enumerate:Received number of events in eventList is #{eventsCount} @ #{Time.now.utc.iso8601}"
             newEventQueryState = parse_and_emit_records(eventList, eventQueryState, newEventQueryState, batchTime)
           else
             $log.warn "in_kube_events::enumerate:Received empty eventList"
diff --git a/source/plugins/ruby/in_kube_nodes.rb b/source/plugins/ruby/in_kube_nodes.rb
index 4d58382f5..e7c5060a5 100644
--- a/source/plugins/ruby/in_kube_nodes.rb
+++ b/source/plugins/ruby/in_kube_nodes.rb
@@ -32,7 +32,12 @@ def initialize
       require_relative "ApplicationInsightsUtility"
       require_relative "oms_common"
       require_relative "omslog"
-      @NODES_CHUNK_SIZE = "400"
+      # refer tomlparser-agent-config for the defaults
+      @NODES_CHUNK_SIZE = 0
+      @NODES_EMIT_STREAM_BATCH_SIZE = 0
+
+      @nodeInventoryE2EProcessingLatencyMs = 0
+      @nodesAPIE2ELatencyMs = 0
       require_relative "constants"
     end
 
@@ -45,11 +50,30 @@ def configure(conf)
 
     def start
       if @run_interval
+        if !ENV["NODES_CHUNK_SIZE"].nil? && !ENV["NODES_CHUNK_SIZE"].empty? && ENV["NODES_CHUNK_SIZE"].to_i > 0
+          @NODES_CHUNK_SIZE = ENV["NODES_CHUNK_SIZE"].to_i
+        else
+          # this shouldnt happen just setting default here as safe guard
+          $log.warn("in_kube_nodes::start: setting to default value since got NODES_CHUNK_SIZE nil or empty")
+          @NODES_CHUNK_SIZE = 250
+        end
+        $log.info("in_kube_nodes::start : NODES_CHUNK_SIZE  @ #{@NODES_CHUNK_SIZE}")
+
+        if !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["NODES_EMIT_STREAM_BATCH_SIZE"].empty? && ENV["NODES_EMIT_STREAM_BATCH_SIZE"].to_i > 0
+          @NODES_EMIT_STREAM_BATCH_SIZE = ENV["NODES_EMIT_STREAM_BATCH_SIZE"].to_i
+        else
+          # this shouldnt happen just setting default here as safe guard
+          $log.warn("in_kube_nodes::start: setting to default value since got NODES_EMIT_STREAM_BATCH_SIZE nil or empty")
+          @NODES_EMIT_STREAM_BATCH_SIZE = 100
+        end
+        $log.info("in_kube_nodes::start : NODES_EMIT_STREAM_BATCH_SIZE  @ #{@NODES_EMIT_STREAM_BATCH_SIZE}")
+
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
         @thread = Thread.new(&method(:run_periodic))
         @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
+        @@nodeInventoryLatencyTelemetryTimeTracker = DateTime.now.to_time.to_i
       end
     end
 
@@ -69,14 +93,20 @@ def enumerate
         currentTime = Time.now
         batchTime = currentTime.utc.iso8601
 
+        @nodesAPIE2ELatencyMs = 0
+        @nodeInventoryE2EProcessingLatencyMs = 0
+        nodeInventoryStartTime = (Time.now.to_f * 1000).to_i
+        nodesAPIChunkStartTime = (Time.now.to_f * 1000).to_i
         # Initializing continuation token to nil
         continuationToken = nil
         $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}")
         resourceUri = KubernetesApiClient.getNodesResourceUri("nodes?limit=#{@NODES_CHUNK_SIZE}")
         continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken(resourceUri)
-
         $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}")
+        nodesAPIChunkEndTime = (Time.now.to_f * 1000).to_i
+        @nodesAPIE2ELatencyMs = (nodesAPIChunkEndTime - nodesAPIChunkStartTime)
         if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
+          $log.info("in_kube_nodes::enumerate : number of node items :#{nodeInventory["items"].length} from Kube API @ #{Time.now.utc.iso8601}")
           parse_and_emit_records(nodeInventory, batchTime)
         else
           $log.warn "in_kube_nodes::enumerate:Received empty nodeInventory"
@@ -84,14 +114,26 @@ def enumerate
 
         #If we receive a continuation token, make calls, process and flush data until we have processed all data
         while (!continuationToken.nil? && !continuationToken.empty?)
+          nodesAPIChunkStartTime = (Time.now.to_f * 1000).to_i
           continuationToken, nodeInventory = KubernetesApiClient.getResourcesAndContinuationToken(resourceUri + "&continue=#{continuationToken}")
+          nodesAPIChunkEndTime = (Time.now.to_f * 1000).to_i
+          @nodesAPIE2ELatencyMs = @nodesAPIE2ELatencyMs + (nodesAPIChunkEndTime - nodesAPIChunkStartTime)
           if (!nodeInventory.nil? && !nodeInventory.empty? && nodeInventory.key?("items") && !nodeInventory["items"].nil? && !nodeInventory["items"].empty?)
+            $log.info("in_kube_nodes::enumerate : number of node items :#{nodeInventory["items"].length} from Kube API @ #{Time.now.utc.iso8601}")
             parse_and_emit_records(nodeInventory, batchTime)
           else
             $log.warn "in_kube_nodes::enumerate:Received empty nodeInventory"
           end
         end
 
+        @nodeInventoryE2EProcessingLatencyMs = ((Time.now.to_f * 1000).to_i - nodeInventoryStartTime)
+        timeDifference = (DateTime.now.to_time.to_i - @@nodeInventoryLatencyTelemetryTimeTracker).abs
+        timeDifferenceInMinutes = timeDifference / 60
+        if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
+          ApplicationInsightsUtility.sendMetricTelemetry("NodeInventoryE2EProcessingLatencyMs", @nodeInventoryE2EProcessingLatencyMs, {})
+          ApplicationInsightsUtility.sendMetricTelemetry("NodesAPIE2ELatencyMs", @nodesAPIE2ELatencyMs, {})
+          @@nodeInventoryLatencyTelemetryTimeTracker = DateTime.now.to_time.to_i
+        end
         # Setting this to nil so that we dont hold memory until GC kicks in
         nodeInventory = nil
       rescue => errorStr
@@ -109,77 +151,32 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
         eventStream = MultiEventStream.new
         containerNodeInventoryEventStream = MultiEventStream.new
         insightsMetricsEventStream = MultiEventStream.new
+        kubePerfEventStream = MultiEventStream.new
         @@istestvar = ENV["ISTEST"]
         #get node inventory
-        nodeInventory["items"].each do |items|
-          record = {}
-          # Sending records for ContainerNodeInventory
-          containerNodeInventoryRecord = {}
-          containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-          containerNodeInventoryRecord["Computer"] = items["metadata"]["name"]
-
-          record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-          record["Computer"] = items["metadata"]["name"]
-          record["ClusterName"] = KubernetesApiClient.getClusterName
-          record["ClusterId"] = KubernetesApiClient.getClusterId
-          record["CreationTimeStamp"] = items["metadata"]["creationTimestamp"]
-          record["Labels"] = [items["metadata"]["labels"]]
-          record["Status"] = ""
-
-          if !items["spec"]["providerID"].nil? && !items["spec"]["providerID"].empty?
-            if File.file?(@@AzStackCloudFileName) # existence of this file indicates agent running on azstack
-              record["KubernetesProviderID"] = "azurestack"
-            else
-              #Multicluster kusto query is filtering after splitting by ":" to the left, so do the same here
-              #https://msazure.visualstudio.com/One/_git/AzureUX-Monitoring?path=%2Fsrc%2FMonitoringExtension%2FClient%2FInfraInsights%2FData%2FQueryTemplates%2FMultiClusterKustoQueryTemplate.ts&_a=contents&version=GBdev
-              provider = items["spec"]["providerID"].split(":")[0]
-              if !provider.nil? && !provider.empty?
-                record["KubernetesProviderID"] = provider
-              else
-                record["KubernetesProviderID"] = items["spec"]["providerID"]
-              end
-            end
-          else
-            record["KubernetesProviderID"] = "onprem"
-          end
-
-          # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
-          # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
-          # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
-          # implying that the node is ready for hosting pods, however its out of disk.
-
-          if items["status"].key?("conditions") && !items["status"]["conditions"].empty?
-            allNodeConditions = ""
-            items["status"]["conditions"].each do |condition|
-              if condition["status"] == "True"
-                if !allNodeConditions.empty?
-                  allNodeConditions = allNodeConditions + "," + condition["type"]
-                else
-                  allNodeConditions = condition["type"]
-                end
-              end
-              #collect last transition to/from ready (no matter ready is true/false)
-              if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
-                record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
-              end
-            end
-            if !allNodeConditions.empty?
-              record["Status"] = allNodeConditions
+        nodeInventory["items"].each do |item|
+          # node inventory
+          nodeInventoryRecord = getNodeInventoryRecord(item, batchTime)
+          wrapper = {
+            "DataType" => "KUBE_NODE_INVENTORY_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [nodeInventoryRecord.each { |k, v| nodeInventoryRecord[k] = v }],
+          }
+          eventStream.add(emitTime, wrapper) if wrapper
+          if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+            $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@tag, eventStream) if eventStream
+            $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
+
+            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+              $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
             end
+            eventStream = MultiEventStream.new
           end
 
-          nodeInfo = items["status"]["nodeInfo"]
-          record["KubeletVersion"] = nodeInfo["kubeletVersion"]
-          record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
-          containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
-          containerRuntimeVersion = nodeInfo["containerRuntimeVersion"]
-          if containerRuntimeVersion.downcase.start_with?("docker://")
-            containerNodeInventoryRecord["DockerVersion"] = containerRuntimeVersion.split("//")[1]
-          else
-            # using containerRuntimeVersion as DockerVersion as is for non docker runtimes
-            containerNodeInventoryRecord["DockerVersion"] = containerRuntimeVersion
-          end
-          # ContainerNodeInventory data for docker version and operating system.
+          # container node inventory
+          containerNodeInventoryRecord = getContainerNodeInventoryRecord(item, batchTime)
           containerNodeInventoryWrapper = {
             "DataType" => "CONTAINER_NODE_INVENTORY_BLOB",
             "IPName" => "ContainerInsights",
@@ -187,33 +184,81 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
           }
           containerNodeInventoryEventStream.add(emitTime, containerNodeInventoryWrapper) if containerNodeInventoryWrapper
 
-          wrapper = {
-            "DataType" => "KUBE_NODE_INVENTORY_BLOB",
-            "IPName" => "ContainerInsights",
-            "DataItems" => [record.each { |k, v| record[k] = v }],
-          }
-          eventStream.add(emitTime, wrapper) if wrapper
+          if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && containerNodeInventoryEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+            $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
+            containerNodeInventoryEventStream = MultiEventStream.new
+          end
+
+          # node metrics records
+          nodeMetricRecords = []
+          nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime)
+          if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+            nodeMetricRecords.push(nodeMetricRecord)
+          end
+          nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "allocatable", "memory", "memoryAllocatableBytes", batchTime)
+          if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+            nodeMetricRecords.push(nodeMetricRecord)
+          end
+          nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "capacity", "cpu", "cpuCapacityNanoCores", batchTime)
+          if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+            nodeMetricRecords.push(nodeMetricRecord)
+          end
+          nodeMetricRecord = KubernetesApiClient.parseNodeLimitsFromNodeItem(item, "capacity", "memory", "memoryCapacityBytes", batchTime)
+          if !nodeMetricRecord.nil? && !nodeMetricRecord.empty?
+            nodeMetricRecords.push(nodeMetricRecord)
+          end
+          nodeMetricRecords.each do |metricRecord|
+            metricRecord["DataType"] = "LINUX_PERF_BLOB"
+            metricRecord["IPName"] = "LogManagement"
+            kubePerfEventStream.add(emitTime, metricRecord) if metricRecord
+          end
+          if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+            $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+            kubePerfEventStream = MultiEventStream.new
+          end
+
+          # node GPU metrics record
+          nodeGPUInsightsMetricsRecords = []
+          insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "allocatable", "nvidia.com/gpu", "nodeGpuAllocatable", batchTime)
+          if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+            nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+          end
+          insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "capacity", "nvidia.com/gpu", "nodeGpuCapacity", batchTime)
+          if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+            nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+          end
+          insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "allocatable", "amd.com/gpu", "nodeGpuAllocatable", batchTime)
+          if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+            nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+          end
+          insightsMetricsRecord = KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(item, "capacity", "amd.com/gpu", "nodeGpuCapacity", batchTime)
+          if !insightsMetricsRecord.nil? && !insightsMetricsRecord.empty?
+            nodeGPUInsightsMetricsRecords.push(insightsMetricsRecord)
+          end
+          nodeGPUInsightsMetricsRecords.each do |insightsMetricsRecord|
+            wrapper = {
+              "DataType" => "INSIGHTS_METRICS_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+            }
+            insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
+          end
+          if @NODES_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @NODES_EMIT_STREAM_BATCH_SIZE
+            $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{@NODES_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+            insightsMetricsEventStream = MultiEventStream.new
+          end
           # Adding telemetry to send node telemetry every 10 minutes
           timeDifference = (DateTime.now.to_time.to_i - @@nodeTelemetryTimeTracker).abs
           timeDifferenceInMinutes = timeDifference / 60
           if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
-            properties = {}
-            properties["Computer"] = record["Computer"]
-            properties["KubeletVersion"] = record["KubeletVersion"]
-            properties["OperatingSystem"] = nodeInfo["operatingSystem"]
-            # DockerVersion field holds docker version if runtime is docker/moby else <runtime>://<version>
-            if containerRuntimeVersion.downcase.start_with?("docker://")
-              properties["DockerVersion"] = containerRuntimeVersion.split("//")[1]
-            else
-              properties["DockerVersion"] = containerRuntimeVersion
-            end
-            properties["KubernetesProviderID"] = record["KubernetesProviderID"]
-            properties["KernelVersion"] = nodeInfo["kernelVersion"]
-            properties["OSImage"] = nodeInfo["osImage"]
+            properties = getNodeTelemetryProps(item)
+            properties["KubernetesProviderID"] = nodeInventoryRecord["KubernetesProviderID"]
+            capacityInfo = item["status"]["capacity"]
 
-            capacityInfo = items["status"]["capacity"]
             ApplicationInsightsUtility.sendMetricTelemetry("NodeMemory", capacityInfo["memory"], properties)
-
             begin
               if (!capacityInfo["nvidia.com/gpu"].nil?) && (!capacityInfo["nvidia.com/gpu"].empty?)
                 properties["nvigpus"] = capacityInfo["nvidia.com/gpu"]
@@ -247,72 +292,32 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
             telemetrySent = true
           end
         end
-        router.emit_stream(@tag, eventStream) if eventStream
-        router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
-        router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
         if telemetrySent == true
           @@nodeTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
-
-        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
-          $log.info("kubeNodeInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+        if eventStream.count > 0
+          $log.info("in_kube_node::parse_and_emit_records: number of node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@tag, eventStream) if eventStream
+          $log.info("in_kube_node::parse_and_emit_records: number of mdm node inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@@MDMKubeNodeInventoryTag, eventStream) if eventStream
+          eventStream = nil
         end
-        #:optimize:kubeperf merge
-        begin
-          #if(!nodeInventory.empty?)
-          nodeMetricDataItems = []
-          #allocatable metrics @ node level
-          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "cpu", "cpuAllocatableNanoCores", batchTime))
-          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "allocatable", "memory", "memoryAllocatableBytes", batchTime))
-          #capacity metrics @ node level
-          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "cpu", "cpuCapacityNanoCores", batchTime))
-          nodeMetricDataItems.concat(KubernetesApiClient.parseNodeLimits(nodeInventory, "capacity", "memory", "memoryCapacityBytes", batchTime))
-
-          kubePerfEventStream = MultiEventStream.new
-
-          nodeMetricDataItems.each do |record|
-            record["DataType"] = "LINUX_PERF_BLOB"
-            record["IPName"] = "LogManagement"
-            kubePerfEventStream.add(emitTime, record) if record
-          end
-          #end
-          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-
-          #start GPU InsightsMetrics items
-          begin
-            nodeGPUInsightsMetricsDataItems = []
-            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "allocatable", "nvidia.com/gpu", "nodeGpuAllocatable", batchTime))
-            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "capacity", "nvidia.com/gpu", "nodeGpuCapacity", batchTime))
-
-            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "allocatable", "amd.com/gpu", "nodeGpuAllocatable", batchTime))
-            nodeGPUInsightsMetricsDataItems.concat(KubernetesApiClient.parseNodeLimitsAsInsightsMetrics(nodeInventory, "capacity", "amd.com/gpu", "nodeGpuCapacity", batchTime))
-
-            nodeGPUInsightsMetricsDataItems.each do |insightsMetricsRecord|
-              wrapper = {
-                "DataType" => "INSIGHTS_METRICS_BLOB",
-                "IPName" => "ContainerInsights",
-                "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
-              }
-              insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
-            end
-
-            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
-              $log.info("kubeNodeInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-            end
-          rescue => errorStr
-            $log.warn "Failed when processing GPU metrics in_kube_nodes : #{errorStr}"
-            $log.debug_backtrace(errorStr.backtrace)
-            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end
-          #end GPU InsightsMetrics items
-        rescue => errorStr
-          $log.warn "Failed in enumerate for KubePerf from in_kube_nodes : #{errorStr}"
-          $log.debug_backtrace(errorStr.backtrace)
-          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+        if containerNodeInventoryEventStream.count > 0
+          $log.info("in_kube_node::parse_and_emit_records: number of container node inventory records emitted #{containerNodeInventoryEventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@@ContainerNodeInventoryTag, containerNodeInventoryEventStream) if containerNodeInventoryEventStream
+          containerNodeInventoryEventStream = nil
         end
-        #:optimize:end kubeperf merge
 
+        if kubePerfEventStream.count > 0
+          $log.info("in_kube_nodes::parse_and_emit_records: number of node perf metric records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+          kubePerfEventStream = nil
+        end
+        if insightsMetricsEventStream.count > 0
+          $log.info("in_kube_nodes::parse_and_emit_records: number of GPU node perf metric records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+          insightsMetricsEventStream = nil
+        end
       rescue => errorStr
         $log.warn "Failed to retrieve node inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
@@ -352,5 +357,112 @@ def run_periodic
       end
       @mutex.unlock
     end
+
+    # TODO - move this method to KubernetesClient or helper class
+    def getNodeInventoryRecord(item, batchTime = Time.utc.iso8601)
+      record = {}
+      begin
+        record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+        record["Computer"] = item["metadata"]["name"]
+        record["ClusterName"] = KubernetesApiClient.getClusterName
+        record["ClusterId"] = KubernetesApiClient.getClusterId
+        record["CreationTimeStamp"] = item["metadata"]["creationTimestamp"]
+        record["Labels"] = [item["metadata"]["labels"]]
+        record["Status"] = ""
+
+        if !item["spec"]["providerID"].nil? && !item["spec"]["providerID"].empty?
+          if File.file?(@@AzStackCloudFileName) # existence of this file indicates agent running on azstack
+            record["KubernetesProviderID"] = "azurestack"
+          else
+            #Multicluster kusto query is filtering after splitting by ":" to the left, so do the same here
+            #https://msazure.visualstudio.com/One/_git/AzureUX-Monitoring?path=%2Fsrc%2FMonitoringExtension%2FClient%2FInfraInsights%2FData%2FQueryTemplates%2FMultiClusterKustoQueryTemplate.ts&_a=contents&version=GBdev
+            provider = item["spec"]["providerID"].split(":")[0]
+            if !provider.nil? && !provider.empty?
+              record["KubernetesProviderID"] = provider
+            else
+              record["KubernetesProviderID"] = item["spec"]["providerID"]
+            end
+          end
+        else
+          record["KubernetesProviderID"] = "onprem"
+        end
+
+        # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions.
+        # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we
+        # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk"
+        # implying that the node is ready for hosting pods, however its out of disk.
+        if item["status"].key?("conditions") && !item["status"]["conditions"].empty?
+          allNodeConditions = ""
+          item["status"]["conditions"].each do |condition|
+            if condition["status"] == "True"
+              if !allNodeConditions.empty?
+                allNodeConditions = allNodeConditions + "," + condition["type"]
+              else
+                allNodeConditions = condition["type"]
+              end
+            end
+            #collect last transition to/from ready (no matter ready is true/false)
+            if condition["type"] == "Ready" && !condition["lastTransitionTime"].nil?
+              record["LastTransitionTimeReady"] = condition["lastTransitionTime"]
+            end
+          end
+          if !allNodeConditions.empty?
+            record["Status"] = allNodeConditions
+          end
+        end
+        nodeInfo = item["status"]["nodeInfo"]
+        record["KubeletVersion"] = nodeInfo["kubeletVersion"]
+        record["KubeProxyVersion"] = nodeInfo["kubeProxyVersion"]
+      rescue => errorStr
+        $log.warn "in_kube_nodes::getNodeInventoryRecord:Failed: #{errorStr}"
+      end
+      return record
+    end
+
+    # TODO - move this method to KubernetesClient or helper class
+    def getContainerNodeInventoryRecord(item, batchTime = Time.utc.iso8601)
+      containerNodeInventoryRecord = {}
+      begin
+        containerNodeInventoryRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+        containerNodeInventoryRecord["Computer"] = item["metadata"]["name"]
+        nodeInfo = item["status"]["nodeInfo"]
+        containerNodeInventoryRecord["OperatingSystem"] = nodeInfo["osImage"]
+        containerRuntimeVersion = nodeInfo["containerRuntimeVersion"]
+        if containerRuntimeVersion.downcase.start_with?("docker://")
+          containerNodeInventoryRecord["DockerVersion"] = containerRuntimeVersion.split("//")[1]
+        else
+          # using containerRuntimeVersion as DockerVersion as is for non docker runtimes
+          containerNodeInventoryRecord["DockerVersion"] = containerRuntimeVersion
+        end
+      rescue => errorStr
+        $log.warn "in_kube_nodes::getContainerNodeInventoryRecord:Failed: #{errorStr}"
+      end
+      return containerNodeInventoryRecord
+    end
+
+    # TODO - move this method to KubernetesClient or helper class
+    def getNodeTelemetryProps(item)
+      properties = {}
+      begin
+        properties["Computer"] = item["metadata"]["name"]
+        nodeInfo = item["status"]["nodeInfo"]
+        properties["KubeletVersion"] = nodeInfo["kubeletVersion"]
+        properties["OperatingSystem"] = nodeInfo["osImage"]
+        properties["KernelVersion"] = nodeInfo["kernelVersion"]
+        properties["OSImage"] = nodeInfo["osImage"]
+        containerRuntimeVersion = nodeInfo["containerRuntimeVersion"]
+        if containerRuntimeVersion.downcase.start_with?("docker://")
+          properties["DockerVersion"] = containerRuntimeVersion.split("//")[1]
+        else
+          # using containerRuntimeVersion as DockerVersion as is for non docker runtimes
+          properties["DockerVersion"] = containerRuntimeVersion
+        end
+        properties["NODES_CHUNK_SIZE"] = @NODES_CHUNK_SIZE
+        properties["NODES_EMIT_STREAM_BATCH_SIZE"] = @NODES_EMIT_STREAM_BATCH_SIZE
+      rescue => errorStr
+        $log.warn "in_kube_nodes::getContainerNodeIngetNodeTelemetryPropsventoryRecord:Failed: #{errorStr}"
+      end
+      return properties
+    end
   end # Kube_Node_Input
 end # module
diff --git a/source/plugins/ruby/in_kube_podinventory.rb b/source/plugins/ruby/in_kube_podinventory.rb
index bba3e920f..0cff2eefe 100644
--- a/source/plugins/ruby/in_kube_podinventory.rb
+++ b/source/plugins/ruby/in_kube_podinventory.rb
@@ -2,7 +2,7 @@
 # frozen_string_literal: true
 
 module Fluent
-  require_relative "podinventory_to_mdm"      
+  require_relative "podinventory_to_mdm"
 
   class Kube_PodInventory_Input < Input
     Plugin.register_input("kubepodinventory", self)
@@ -19,7 +19,7 @@ def initialize
       require "yajl"
       require "set"
       require "time"
-      
+
       require_relative "kubernetes_container_inventory"
       require_relative "KubernetesApiClient"
       require_relative "ApplicationInsightsUtility"
@@ -27,11 +27,18 @@ def initialize
       require_relative "omslog"
       require_relative "constants"
 
-      @PODS_CHUNK_SIZE = "1500"
+      # refer tomlparser-agent-config for updating defaults
+      # this configurable via configmap
+      @PODS_CHUNK_SIZE = 0
+      @PODS_EMIT_STREAM_BATCH_SIZE = 0
+
       @podCount = 0
+      @serviceCount = 0
       @controllerSet = Set.new []
       @winContainerCount = 0
       @controllerData = {}
+      @podInventoryE2EProcessingLatencyMs = 0
+      @podsAPIE2ELatencyMs = 0
     end
 
     config_param :run_interval, :time, :default => 60
@@ -44,6 +51,24 @@ def configure(conf)
 
     def start
       if @run_interval
+        if !ENV["PODS_CHUNK_SIZE"].nil? && !ENV["PODS_CHUNK_SIZE"].empty? && ENV["PODS_CHUNK_SIZE"].to_i > 0
+          @PODS_CHUNK_SIZE = ENV["PODS_CHUNK_SIZE"].to_i
+        else
+          # this shouldnt happen just setting default here as safe guard
+          $log.warn("in_kube_podinventory::start: setting to default value since got PODS_CHUNK_SIZE nil or empty")
+          @PODS_CHUNK_SIZE = 1000
+        end
+        $log.info("in_kube_podinventory::start : PODS_CHUNK_SIZE  @ #{@PODS_CHUNK_SIZE}")
+
+        if !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].nil? && !ENV["PODS_EMIT_STREAM_BATCH_SIZE"].empty? && ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i > 0
+          @PODS_EMIT_STREAM_BATCH_SIZE = ENV["PODS_EMIT_STREAM_BATCH_SIZE"].to_i
+        else
+          # this shouldnt happen just setting default here as safe guard
+          $log.warn("in_kube_podinventory::start: setting to default value since got PODS_EMIT_STREAM_BATCH_SIZE nil or empty")
+          @PODS_EMIT_STREAM_BATCH_SIZE = 200
+        end
+        $log.info("in_kube_podinventory::start : PODS_EMIT_STREAM_BATCH_SIZE  @ #{@PODS_EMIT_STREAM_BATCH_SIZE}")
+
         @finished = false
         @condition = ConditionVariable.new
         @mutex = Mutex.new
@@ -67,12 +92,15 @@ def enumerate(podList = nil)
         podInventory = podList
         telemetryFlush = false
         @podCount = 0
+        @serviceCount = 0
         @controllerSet = Set.new []
         @winContainerCount = 0
         @controllerData = {}
         currentTime = Time.now
         batchTime = currentTime.utc.iso8601
-
+        serviceRecords = []
+        @podInventoryE2EProcessingLatencyMs = 0
+        podInventoryStartTime = (Time.now.to_f * 1000).to_i
         # Get services first so that we dont need to make a call for very chunk
         $log.info("in_kube_podinventory::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}")
         serviceInfo = KubernetesApiClient.getKubeResourceInfo("services")
@@ -84,32 +112,48 @@ def enumerate(podList = nil)
           serviceList = Yajl::Parser.parse(StringIO.new(serviceInfo.body))
           $log.info("in_kube_podinventory::enumerate:End:Parsing services data using yajl @ #{Time.now.utc.iso8601}")
           serviceInfo = nil
+          # service inventory records much smaller and fixed size compared to serviceList
+          serviceRecords = KubernetesApiClient.getKubeServicesInventoryRecords(serviceList, batchTime)
+          # updating for telemetry
+          @serviceCount += serviceRecords.length
+          serviceList = nil
         end
 
+        # to track e2e processing latency
+        @podsAPIE2ELatencyMs = 0
+        podsAPIChunkStartTime = (Time.now.to_f * 1000).to_i
         # Initializing continuation token to nil
         continuationToken = nil
         $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}")
         continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}")
         $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}")
+        podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i
+        @podsAPIE2ELatencyMs = (podsAPIChunkEndTime - podsAPIChunkStartTime)
         if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
-          parse_and_emit_records(podInventory, serviceList, continuationToken, batchTime)
+          $log.info("in_kube_podinventory::enumerate : number of pod items :#{podInventory["items"].length}  from Kube API @ #{Time.now.utc.iso8601}")
+          parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime)
         else
           $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
         end
 
         #If we receive a continuation token, make calls, process and flush data until we have processed all data
         while (!continuationToken.nil? && !continuationToken.empty?)
+          podsAPIChunkStartTime = (Time.now.to_f * 1000).to_i
           continuationToken, podInventory = KubernetesApiClient.getResourcesAndContinuationToken("pods?limit=#{@PODS_CHUNK_SIZE}&continue=#{continuationToken}")
+          podsAPIChunkEndTime = (Time.now.to_f * 1000).to_i
+          @podsAPIE2ELatencyMs = @podsAPIE2ELatencyMs + (podsAPIChunkEndTime - podsAPIChunkStartTime)
           if (!podInventory.nil? && !podInventory.empty? && podInventory.key?("items") && !podInventory["items"].nil? && !podInventory["items"].empty?)
-            parse_and_emit_records(podInventory, serviceList, continuationToken, batchTime)
+            $log.info("in_kube_podinventory::enumerate : number of pod items :#{podInventory["items"].length} from Kube API @ #{Time.now.utc.iso8601}")
+            parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime)
           else
             $log.warn "in_kube_podinventory::enumerate:Received empty podInventory"
           end
         end
 
+        @podInventoryE2EProcessingLatencyMs = ((Time.now.to_f * 1000).to_i - podInventoryStartTime)
         # Setting these to nil so that we dont hold memory until GC kicks in
         podInventory = nil
-        serviceList = nil
+        serviceRecords = nil
 
         # Adding telemetry to send pod telemetry every 5 minutes
         timeDifference = (DateTime.now.to_time.to_i - @@podTelemetryTimeTracker).abs
@@ -122,14 +166,19 @@ def enumerate(podList = nil)
         if telemetryFlush == true
           telemetryProperties = {}
           telemetryProperties["Computer"] = @@hostName
+          telemetryProperties["PODS_CHUNK_SIZE"] = @PODS_CHUNK_SIZE
+          telemetryProperties["PODS_EMIT_STREAM_BATCH_SIZE"] = @PODS_EMIT_STREAM_BATCH_SIZE
           ApplicationInsightsUtility.sendCustomEvent("KubePodInventoryHeartBeatEvent", telemetryProperties)
           ApplicationInsightsUtility.sendMetricTelemetry("PodCount", @podCount, {})
+          ApplicationInsightsUtility.sendMetricTelemetry("ServiceCount", @serviceCount, {})
           telemetryProperties["ControllerData"] = @controllerData.to_json
           ApplicationInsightsUtility.sendMetricTelemetry("ControllerCount", @controllerSet.length, telemetryProperties)
           if @winContainerCount > 0
             telemetryProperties["ClusterWideWindowsContainersCount"] = @winContainerCount
             ApplicationInsightsUtility.sendCustomEvent("WindowsContainerInventoryEvent", telemetryProperties)
           end
+          ApplicationInsightsUtility.sendMetricTelemetry("PodInventoryE2EProcessingLatencyMs", @podInventoryE2EProcessingLatencyMs, telemetryProperties)
+          ApplicationInsightsUtility.sendMetricTelemetry("PodsAPIE2ELatencyMs", @podsAPIE2ELatencyMs, telemetryProperties)
           @@podTelemetryTimeTracker = DateTime.now.to_time.to_i
         end
       rescue => errorStr
@@ -137,260 +186,138 @@ def enumerate(podList = nil)
         $log.debug_backtrace(errorStr.backtrace)
         ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
       end
-    end    
+    end
 
-    def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTime = Time.utc.iso8601)
+    def parse_and_emit_records(podInventory, serviceRecords, continuationToken, batchTime = Time.utc.iso8601)
       currentTime = Time.now
       emitTime = currentTime.to_f
       #batchTime = currentTime.utc.iso8601
       eventStream = MultiEventStream.new
+      kubePerfEventStream = MultiEventStream.new
+      insightsMetricsEventStream = MultiEventStream.new
       @@istestvar = ENV["ISTEST"]
 
       begin #begin block start
         # Getting windows nodes from kubeapi
         winNodes = KubernetesApiClient.getWindowsNodesArray
-
-        podInventory["items"].each do |items| #podInventory block start          
-          containerInventoryRecords = []
-          records = []
-          record = {}
-          record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-          record["Name"] = items["metadata"]["name"]
-          podNameSpace = items["metadata"]["namespace"]
-
-          # For ARO v3 cluster, skip the pods scheduled on to master or infra nodes
-          if KubernetesApiClient.isAROV3Cluster && !items["spec"].nil? && !items["spec"]["nodeName"].nil? &&
-             (items["spec"]["nodeName"].downcase.start_with?("infra-") ||
-              items["spec"]["nodeName"].downcase.start_with?("master-"))
-            next
-          end
-
-          podUid = KubernetesApiClient.getPodUid(podNameSpace, items["metadata"])
-          if podUid.nil?
-            next
-          end
-          record["PodUid"] = podUid
-          record["PodLabel"] = [items["metadata"]["labels"]]
-          record["Namespace"] = podNameSpace
-          record["PodCreationTimeStamp"] = items["metadata"]["creationTimestamp"]
-          #for unscheduled (non-started) pods startTime does NOT exist
-          if !items["status"]["startTime"].nil?
-            record["PodStartTime"] = items["status"]["startTime"]
-          else
-            record["PodStartTime"] = ""
-          end
-          #podStatus
-          # the below is for accounting 'NodeLost' scenario, where-in the pod(s) in the lost node is still being reported as running
-          podReadyCondition = true
-          if !items["status"]["reason"].nil? && items["status"]["reason"] == "NodeLost" && !items["status"]["conditions"].nil?
-            items["status"]["conditions"].each do |condition|
-              if condition["type"] == "Ready" && condition["status"] == "False"
-                podReadyCondition = false
-                break
-              end
+        podInventory["items"].each do |item| #podInventory block start
+          # pod inventory records
+          podInventoryRecords = getPodInventoryRecords(item, serviceRecords, batchTime)
+          podInventoryRecords.each do |record|
+            if !record.nil?
+              wrapper = {
+                          "DataType" => "KUBE_POD_INVENTORY_BLOB",
+                          "IPName" => "ContainerInsights",
+                          "DataItems" => [record.each { |k, v| record[k] = v }],
+                        }
+              eventStream.add(emitTime, wrapper) if wrapper
+              @inventoryToMdmConvertor.process_pod_inventory_record(wrapper)
             end
           end
-
-          if podReadyCondition == false
-            record["PodStatus"] = "Unknown"
-          # ICM - https://portal.microsofticm.com/imp/v3/incidents/details/187091803/home
-          elsif !items["metadata"]["deletionTimestamp"].nil? && !items["metadata"]["deletionTimestamp"].empty?
-            record["PodStatus"] = Constants::POD_STATUS_TERMINATING
-          else
-            record["PodStatus"] = items["status"]["phase"]
-          end
-          #for unscheduled (non-started) pods podIP does NOT exist
-          if !items["status"]["podIP"].nil?
-            record["PodIp"] = items["status"]["podIP"]
-          else
-            record["PodIp"] = ""
-          end
-          #for unscheduled (non-started) pods nodeName does NOT exist
-          if !items["spec"]["nodeName"].nil?
-            record["Computer"] = items["spec"]["nodeName"]
-          else
-            record["Computer"] = ""
-          end
-
           # Setting this flag to true so that we can send ContainerInventory records for containers
           # on windows nodes and parse environment variables for these containers
           if winNodes.length > 0
-            if (!record["Computer"].empty? && (winNodes.include? record["Computer"]))
+            nodeName = ""
+            if !item["spec"]["nodeName"].nil?
+              nodeName = item["spec"]["nodeName"]
+            end
+            if (!nodeName.empty? && (winNodes.include? nodeName))
               clusterCollectEnvironmentVar = ENV["AZMON_CLUSTER_COLLECT_ENV_VAR"]
               #Generate ContainerInventory records for windows nodes so that we can get image and image tag in property panel
-              containerInventoryRecordsInPodItem = KubernetesContainerInventory.getContainerInventoryRecords(items, batchTime, clusterCollectEnvironmentVar, true)  
-              containerInventoryRecordsInPodItem.each do |containerRecord|
-                containerInventoryRecords.push(containerRecord)          
-              end              
+              containerInventoryRecords = KubernetesContainerInventory.getContainerInventoryRecords(item, batchTime, clusterCollectEnvironmentVar, true)
+              # Send container inventory records for containers on windows nodes
+              @winContainerCount += containerInventoryRecords.length
+              containerInventoryRecords.each do |cirecord|
+                if !cirecord.nil?
+                  ciwrapper = {
+                    "DataType" => "CONTAINER_INVENTORY_BLOB",
+                    "IPName" => "ContainerInsights",
+                    "DataItems" => [cirecord.each { |k, v| cirecord[k] = v }],
+                  }
+                  eventStream.add(emitTime, ciwrapper) if ciwrapper
+                end
+              end
             end
           end
 
-          record["ClusterId"] = KubernetesApiClient.getClusterId
-          record["ClusterName"] = KubernetesApiClient.getClusterName
-          record["ServiceName"] = getServiceNameFromLabels(items["metadata"]["namespace"], items["metadata"]["labels"], serviceList)
-
-          if !items["metadata"]["ownerReferences"].nil?
-            record["ControllerKind"] = items["metadata"]["ownerReferences"][0]["kind"]
-            record["ControllerName"] = items["metadata"]["ownerReferences"][0]["name"]
-            @controllerSet.add(record["ControllerKind"] + record["ControllerName"])
-            #Adding controller kind to telemetry ro information about customer workload
-            if (@controllerData[record["ControllerKind"]].nil?)
-              @controllerData[record["ControllerKind"]] = 1
-            else
-              controllerValue = @controllerData[record["ControllerKind"]]
-              @controllerData[record["ControllerKind"]] += 1
+          if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && eventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
+            $log.info("in_kube_podinventory::parse_and_emit_records: number of pod inventory records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+              $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
             end
+            router.emit_stream(@tag, eventStream) if eventStream
+            eventStream = MultiEventStream.new
           end
-          podRestartCount = 0
-          record["PodRestartCount"] = 0
 
-          #Invoke the helper method to compute ready/not ready mdm metric
-          @inventoryToMdmConvertor.process_record_for_pods_ready_metric(record["ControllerName"], record["Namespace"], items["status"]["conditions"])
+          #container perf records
+          containerMetricDataItems = []
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "requests", "cpu", "cpuRequestNanoCores", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "requests", "memory", "memoryRequestBytes", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "limits", "cpu", "cpuLimitNanoCores", batchTime))
+          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(item, "limits", "memory", "memoryLimitBytes", batchTime))
 
-          podContainers = []
-          if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty?
-            podContainers = podContainers + items["status"]["containerStatuses"]
-          end
-          # Adding init containers to the record list as well.
-          if items["status"].key?("initContainerStatuses") && !items["status"]["initContainerStatuses"].empty?
-            podContainers = podContainers + items["status"]["initContainerStatuses"]
+          containerMetricDataItems.each do |record|
+            record["DataType"] = "LINUX_PERF_BLOB"
+            record["IPName"] = "LogManagement"
+            kubePerfEventStream.add(emitTime, record) if record
           end
 
-          # if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? #container status block start
-          if !podContainers.empty? #container status block start
-            podContainers.each do |container|
-              containerRestartCount = 0
-              lastFinishedTime = nil
-              # Need this flag to determine if we need to process container data for mdm metrics like oomkilled and container restart
-              #container Id is of the form
-              #docker://dfd9da983f1fd27432fb2c1fe3049c0a1d25b1c697b2dc1a530c986e58b16527
-              if !container["containerID"].nil?
-                record["ContainerID"] = container["containerID"].split("//")[1]
-              else
-                # for containers that have image issues (like invalid image/tag etc..) this will be empty. do not make it all 0
-                record["ContainerID"] = ""
-              end
-              #keeping this as <PodUid/container_name> which is same as InstanceName in perf table
-              if podUid.nil? || container["name"].nil?
-                next
-              else
-                record["ContainerName"] = podUid + "/" + container["name"]
-              end
-              #Pod restart count is a sumtotal of restart counts of individual containers
-              #within the pod. The restart count of a container is maintained by kubernetes
-              #itself in the form of a container label.
-              containerRestartCount = container["restartCount"]
-              record["ContainerRestartCount"] = containerRestartCount
-
-              containerStatus = container["state"]
-              record["ContainerStatusReason"] = ""
-              # state is of the following form , so just picking up the first key name
-              # "state": {
-              #   "waiting": {
-              #     "reason": "CrashLoopBackOff",
-              #      "message": "Back-off 5m0s restarting failed container=metrics-server pod=metrics-server-2011498749-3g453_kube-system(5953be5f-fcae-11e7-a356-000d3ae0e432)"
-              #   }
-              # },
-              # the below is for accounting 'NodeLost' scenario, where-in the containers in the lost node/pod(s) is still being reported as running
-              if podReadyCondition == false
-                record["ContainerStatus"] = "Unknown"
-              else
-                record["ContainerStatus"] = containerStatus.keys[0]
-              end
-              #TODO : Remove ContainerCreationTimeStamp from here since we are sending it as a metric
-              #Picking up both container and node start time from cAdvisor to be consistent
-              if containerStatus.keys[0] == "running"
-                record["ContainerCreationTimeStamp"] = container["state"]["running"]["startedAt"]
-              else
-                if !containerStatus[containerStatus.keys[0]]["reason"].nil? && !containerStatus[containerStatus.keys[0]]["reason"].empty?
-                  record["ContainerStatusReason"] = containerStatus[containerStatus.keys[0]]["reason"]
-                end
-                # Process the record to see if job was completed 6 hours ago. If so, send metric to mdm
-                if !record["ControllerKind"].nil? && record["ControllerKind"].downcase == Constants::CONTROLLER_KIND_JOB
-                  @inventoryToMdmConvertor.process_record_for_terminated_job_metric(record["ControllerName"], record["Namespace"], containerStatus)
-                end
-              end
-
-              # Record the last state of the container. This may have information on why a container was killed.
-              begin
-                if !container["lastState"].nil? && container["lastState"].keys.length == 1
-                  lastStateName = container["lastState"].keys[0]
-                  lastStateObject = container["lastState"][lastStateName]
-                  if !lastStateObject.is_a?(Hash)
-                    raise "expected a hash object. This could signify a bug or a kubernetes API change"
-                  end
-
-                  if lastStateObject.key?("reason") && lastStateObject.key?("startedAt") && lastStateObject.key?("finishedAt")
-                    newRecord = Hash.new
-                    newRecord["lastState"] = lastStateName  # get the name of the last state (ex: terminated)
-                    lastStateReason = lastStateObject["reason"]
-                    # newRecord["reason"] = lastStateObject["reason"]  # (ex: OOMKilled)
-                    newRecord["reason"] = lastStateReason  # (ex: OOMKilled)
-                    newRecord["startedAt"] = lastStateObject["startedAt"]  # (ex: 2019-07-02T14:58:51Z)
-                    lastFinishedTime = lastStateObject["finishedAt"]
-                    newRecord["finishedAt"] = lastFinishedTime  # (ex: 2019-07-02T14:58:52Z)
-
-                    # only write to the output field if everything previously ran without error
-                    record["ContainerLastStatus"] = newRecord
-
-                    #Populate mdm metric for OOMKilled container count if lastStateReason is OOMKilled
-                    if lastStateReason.downcase == Constants::REASON_OOM_KILLED
-                      @inventoryToMdmConvertor.process_record_for_oom_killed_metric(record["ControllerName"], record["Namespace"], lastFinishedTime)
-                    end
-                    lastStateReason = nil
-                  else
-                    record["ContainerLastStatus"] = Hash.new
-                  end
-                else
-                  record["ContainerLastStatus"] = Hash.new
-                end
-
-                #Populate mdm metric for container restart count if greater than 0
-                if (!containerRestartCount.nil? && (containerRestartCount.is_a? Integer) && containerRestartCount > 0)
-                  @inventoryToMdmConvertor.process_record_for_container_restarts_metric(record["ControllerName"], record["Namespace"], lastFinishedTime)
-                end
-              rescue => errorStr
-                $log.warn "Failed in parse_and_emit_record pod inventory while processing ContainerLastStatus: #{errorStr}"
-                $log.debug_backtrace(errorStr.backtrace)
-                ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-                record["ContainerLastStatus"] = Hash.new
-              end
+          if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubePerfEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
+            $log.info("in_kube_podinventory::parse_and_emit_records: number of container perf records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+            kubePerfEventStream = MultiEventStream.new
+          end
 
-              podRestartCount += containerRestartCount
-              records.push(record.dup)            
-            end
-          else # for unscheduled pods there are no status.containerStatuses, in this case we still want the pod
-            records.push(record)
-          end  #container status block end
-          records.each do |record|
-            if !record.nil?
-              record["PodRestartCount"] = podRestartCount
-              wrapper = {
-                          "DataType" => "KUBE_POD_INVENTORY_BLOB",
-                          "IPName" => "ContainerInsights",
-                          "DataItems" => [record.each { |k, v| record[k] = v }],
-                        }
-              eventStream.add(emitTime, wrapper) if wrapper
-              @inventoryToMdmConvertor.process_pod_inventory_record(wrapper)
-            end
+          # container GPU records
+          containerGPUInsightsMetricsDataItems = []
+          containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "requests", "nvidia.com/gpu", "containerGpuRequests", batchTime))
+          containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "limits", "nvidia.com/gpu", "containerGpuLimits", batchTime))
+          containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "requests", "amd.com/gpu", "containerGpuRequests", batchTime))
+          containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(item, "limits", "amd.com/gpu", "containerGpuLimits", batchTime))
+          containerGPUInsightsMetricsDataItems.each do |insightsMetricsRecord|
+            wrapper = {
+              "DataType" => "INSIGHTS_METRICS_BLOB",
+              "IPName" => "ContainerInsights",
+              "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+            }
+            insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
           end
-          # Send container inventory records for containers on windows nodes
-          @winContainerCount += containerInventoryRecords.length
-          containerInventoryRecords.each do |cirecord|
-            if !cirecord.nil?
-              ciwrapper = {
-                "DataType" => "CONTAINER_INVENTORY_BLOB",
-                "IPName" => "ContainerInsights",
-                "DataItems" => [cirecord.each { |k, v| cirecord[k] = v }],
-              }
-              eventStream.add(emitTime, ciwrapper) if ciwrapper
+
+          if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && insightsMetricsEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
+            $log.info("in_kube_podinventory::parse_and_emit_records: number of GPU insights metrics records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+              $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
             end
+            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+            insightsMetricsEventStream = MultiEventStream.new
           end
         end  #podInventory block end
 
-        router.emit_stream(@tag, eventStream) if eventStream
+        if eventStream.count > 0
+          $log.info("in_kube_podinventory::parse_and_emit_records: number of pod inventory records emitted #{eventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@tag, eventStream) if eventStream
+          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+            $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+          end
+          eventStream = nil
+        end
+
+        if kubePerfEventStream.count > 0
+          $log.info("in_kube_podinventory::parse_and_emit_records: number of perf records emitted #{kubePerfEventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
+          kubePerfEventStream = nil
+        end
+
+        if insightsMetricsEventStream.count > 0
+          $log.info("in_kube_podinventory::parse_and_emit_records: number of insights metrics records emitted #{insightsMetricsEventStream.count} @ #{Time.now.utc.iso8601}")
+          router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+          if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0)
+            $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+          end
+          insightsMetricsEventStream = nil
+        end
 
-        if continuationToken.nil? #no more chunks in this batch to be sent, get all pod inventory records to send
+        if continuationToken.nil? #no more chunks in this batch to be sent, get all mdm pod inventory records to send
           @log.info "Sending pod inventory mdm records to out_mdm"
           pod_inventory_mdm_records = @inventoryToMdmConvertor.get_pod_inventory_mdm_records(batchTime)
           @log.info "pod_inventory_mdm_records.size #{pod_inventory_mdm_records.size}"
@@ -401,101 +328,36 @@ def parse_and_emit_records(podInventory, serviceList, continuationToken, batchTi
           router.emit_stream(@@MDMKubePodInventoryTag, mdm_pod_inventory_es) if mdm_pod_inventory_es
         end
 
-        #:optimize:kubeperf merge
-        begin
-          #if(!podInventory.empty?)
-          containerMetricDataItems = []
-          #hostName = (OMS::Common.get_hostname)
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "cpu", "cpuRequestNanoCores", batchTime))
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "requests", "memory", "memoryRequestBytes", batchTime))
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "cpu", "cpuLimitNanoCores", batchTime))
-          containerMetricDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimits(podInventory, "limits", "memory", "memoryLimitBytes", batchTime))
-
-          kubePerfEventStream = MultiEventStream.new
-          insightsMetricsEventStream = MultiEventStream.new
-
-          containerMetricDataItems.each do |record|
-            record["DataType"] = "LINUX_PERF_BLOB"
-            record["IPName"] = "LogManagement"
-            kubePerfEventStream.add(emitTime, record) if record
-          end
-          #end
-          router.emit_stream(@@kubeperfTag, kubePerfEventStream) if kubePerfEventStream
-
-          begin
-            #start GPU InsightsMetrics items
-
-            containerGPUInsightsMetricsDataItems = []
-            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "requests", "nvidia.com/gpu", "containerGpuRequests", batchTime))
-            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "limits", "nvidia.com/gpu", "containerGpuLimits", batchTime))
-
-            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "requests", "amd.com/gpu", "containerGpuRequests", batchTime))
-            containerGPUInsightsMetricsDataItems.concat(KubernetesApiClient.getContainerResourceRequestsAndLimitsAsInsightsMetrics(podInventory, "limits", "amd.com/gpu", "containerGpuLimits", batchTime))
-
-            containerGPUInsightsMetricsDataItems.each do |insightsMetricsRecord|
-              wrapper = {
-                "DataType" => "INSIGHTS_METRICS_BLOB",
-                "IPName" => "ContainerInsights",
-                "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
-              }
-              insightsMetricsEventStream.add(emitTime, wrapper) if wrapper
-
-              if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
-                $log.info("kubePodInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-              end
-            end
-
-            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-            #end GPU InsightsMetrics items
-          rescue => errorStr
-            $log.warn "Failed when processing GPU metrics in_kube_podinventory : #{errorStr}"
-            $log.debug_backtrace(errorStr.backtrace)
-            ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-          end
-        rescue => errorStr
-          $log.warn "Failed in parse_and_emit_record for KubePerf from in_kube_podinventory : #{errorStr}"
-          $log.debug_backtrace(errorStr.backtrace)
-          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
-        end
-        #:optimize:end kubeperf merge
-
-        #:optimize:start kubeservices merge
-        begin
-          if (!serviceList.nil? && !serviceList.empty?)
-            kubeServicesEventStream = MultiEventStream.new
-            serviceList["items"].each do |items|
-              kubeServiceRecord = {}
-              kubeServiceRecord["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
-              kubeServiceRecord["ServiceName"] = items["metadata"]["name"]
-              kubeServiceRecord["Namespace"] = items["metadata"]["namespace"]
-              kubeServiceRecord["SelectorLabels"] = [items["spec"]["selector"]]
+        if continuationToken.nil? # sending kube services inventory records
+          kubeServicesEventStream = MultiEventStream.new
+          serviceRecords.each do |kubeServiceRecord|
+            if !kubeServiceRecord.nil?
+              # adding before emit to reduce memory foot print
               kubeServiceRecord["ClusterId"] = KubernetesApiClient.getClusterId
               kubeServiceRecord["ClusterName"] = KubernetesApiClient.getClusterName
-              kubeServiceRecord["ClusterIP"] = items["spec"]["clusterIP"]
-              kubeServiceRecord["ServiceType"] = items["spec"]["type"]
-              #<TODO> : Add ports and status fields
               kubeServicewrapper = {
                 "DataType" => "KUBE_SERVICES_BLOB",
                 "IPName" => "ContainerInsights",
                 "DataItems" => [kubeServiceRecord.each { |k, v| kubeServiceRecord[k] = v }],
               }
               kubeServicesEventStream.add(emitTime, kubeServicewrapper) if kubeServicewrapper
+              if @PODS_EMIT_STREAM_BATCH_SIZE > 0 && kubeServicesEventStream.count >= @PODS_EMIT_STREAM_BATCH_SIZE
+                $log.info("in_kube_podinventory::parse_and_emit_records: number of service records emitted #{@PODS_EMIT_STREAM_BATCH_SIZE} @ #{Time.now.utc.iso8601}")
+                router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
+                kubeServicesEventStream = MultiEventStream.new
+              end
             end
+          end
+
+          if kubeServicesEventStream.count > 0
+            $log.info("in_kube_podinventory::parse_and_emit_records : number of service records emitted #{kubeServicesEventStream.count} @ #{Time.now.utc.iso8601}")
             router.emit_stream(@@kubeservicesTag, kubeServicesEventStream) if kubeServicesEventStream
           end
-        rescue => errorStr
-          $log.warn "Failed in parse_and_emit_record for KubeServices from in_kube_podinventory : #{errorStr}"
-          $log.debug_backtrace(errorStr.backtrace)
-          ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+          kubeServicesEventStream = nil
         end
-        #:optimize:end kubeservices merge
 
         #Updating value for AppInsights telemetry
         @podCount += podInventory["items"].length
-
-        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
-          $log.info("kubePodInventoryEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-        end
       rescue => errorStr
         $log.warn "Failed in parse_and_emit_record pod inventory: #{errorStr}"
         $log.debug_backtrace(errorStr.backtrace)
@@ -535,25 +397,238 @@ def run_periodic
       @mutex.unlock
     end
 
-    def getServiceNameFromLabels(namespace, labels, serviceList)
+    # TODO - move this method to KubernetesClient or helper class
+    def getPodInventoryRecords(item, serviceRecords, batchTime = Time.utc.iso8601)
+      records = []
+      record = {}
+
+      begin
+        record["CollectionTime"] = batchTime #This is the time that is mapped to become TimeGenerated
+        record["Name"] = item["metadata"]["name"]
+        podNameSpace = item["metadata"]["namespace"]
+        podUid = KubernetesApiClient.getPodUid(podNameSpace, item["metadata"])
+        if podUid.nil?
+          return records
+        end
+
+        nodeName = ""
+        #for unscheduled (non-started) pods nodeName does NOT exist
+        if !item["spec"]["nodeName"].nil?
+          nodeName = item["spec"]["nodeName"]
+        end
+        # For ARO v3 cluster, skip the pods scheduled on to master or infra nodes
+        if KubernetesApiClient.isAROv3MasterOrInfraPod(nodeName)
+          return records
+        end
+
+        record["PodUid"] = podUid
+        record["PodLabel"] = [item["metadata"]["labels"]]
+        record["Namespace"] = podNameSpace
+        record["PodCreationTimeStamp"] = item["metadata"]["creationTimestamp"]
+        #for unscheduled (non-started) pods startTime does NOT exist
+        if !item["status"]["startTime"].nil?
+          record["PodStartTime"] = item["status"]["startTime"]
+        else
+          record["PodStartTime"] = ""
+        end
+        #podStatus
+        # the below is for accounting 'NodeLost' scenario, where-in the pod(s) in the lost node is still being reported as running
+        podReadyCondition = true
+        if !item["status"]["reason"].nil? && item["status"]["reason"] == "NodeLost" && !item["status"]["conditions"].nil?
+          item["status"]["conditions"].each do |condition|
+            if condition["type"] == "Ready" && condition["status"] == "False"
+              podReadyCondition = false
+              break
+            end
+          end
+        end
+        if podReadyCondition == false
+          record["PodStatus"] = "Unknown"
+          # ICM - https://portal.microsofticm.com/imp/v3/incidents/details/187091803/home
+        elsif !item["metadata"]["deletionTimestamp"].nil? && !item["metadata"]["deletionTimestamp"].empty?
+          record["PodStatus"] = Constants::POD_STATUS_TERMINATING
+        else
+          record["PodStatus"] = item["status"]["phase"]
+        end
+        #for unscheduled (non-started) pods podIP does NOT exist
+        if !item["status"]["podIP"].nil?
+          record["PodIp"] = item["status"]["podIP"]
+        else
+          record["PodIp"] = ""
+        end
+
+        record["Computer"] = nodeName
+        record["ClusterId"] = KubernetesApiClient.getClusterId
+        record["ClusterName"] = KubernetesApiClient.getClusterName
+        record["ServiceName"] = getServiceNameFromLabels(item["metadata"]["namespace"], item["metadata"]["labels"], serviceRecords)
+
+        if !item["metadata"]["ownerReferences"].nil?
+          record["ControllerKind"] = item["metadata"]["ownerReferences"][0]["kind"]
+          record["ControllerName"] = item["metadata"]["ownerReferences"][0]["name"]
+          @controllerSet.add(record["ControllerKind"] + record["ControllerName"])
+          #Adding controller kind to telemetry ro information about customer workload
+          if (@controllerData[record["ControllerKind"]].nil?)
+            @controllerData[record["ControllerKind"]] = 1
+          else
+            controllerValue = @controllerData[record["ControllerKind"]]
+            @controllerData[record["ControllerKind"]] += 1
+          end
+        end
+        podRestartCount = 0
+        record["PodRestartCount"] = 0
+
+        #Invoke the helper method to compute ready/not ready mdm metric
+        @inventoryToMdmConvertor.process_record_for_pods_ready_metric(record["ControllerName"], record["Namespace"], item["status"]["conditions"])
+
+        podContainers = []
+        if item["status"].key?("containerStatuses") && !item["status"]["containerStatuses"].empty?
+          podContainers = podContainers + item["status"]["containerStatuses"]
+        end
+        # Adding init containers to the record list as well.
+        if item["status"].key?("initContainerStatuses") && !item["status"]["initContainerStatuses"].empty?
+          podContainers = podContainers + item["status"]["initContainerStatuses"]
+        end
+        # if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? #container status block start
+        if !podContainers.empty? #container status block start
+          podContainers.each do |container|
+            containerRestartCount = 0
+            lastFinishedTime = nil
+            # Need this flag to determine if we need to process container data for mdm metrics like oomkilled and container restart
+            #container Id is of the form
+            #docker://dfd9da983f1fd27432fb2c1fe3049c0a1d25b1c697b2dc1a530c986e58b16527
+            if !container["containerID"].nil?
+              record["ContainerID"] = container["containerID"].split("//")[1]
+            else
+              # for containers that have image issues (like invalid image/tag etc..) this will be empty. do not make it all 0
+              record["ContainerID"] = ""
+            end
+            #keeping this as <PodUid/container_name> which is same as InstanceName in perf table
+            if podUid.nil? || container["name"].nil?
+              next
+            else
+              record["ContainerName"] = podUid + "/" + container["name"]
+            end
+            #Pod restart count is a sumtotal of restart counts of individual containers
+            #within the pod. The restart count of a container is maintained by kubernetes
+            #itself in the form of a container label.
+            containerRestartCount = container["restartCount"]
+            record["ContainerRestartCount"] = containerRestartCount
+
+            containerStatus = container["state"]
+            record["ContainerStatusReason"] = ""
+            # state is of the following form , so just picking up the first key name
+            # "state": {
+            #   "waiting": {
+            #     "reason": "CrashLoopBackOff",
+            #      "message": "Back-off 5m0s restarting failed container=metrics-server pod=metrics-server-2011498749-3g453_kube-system(5953be5f-fcae-11e7-a356-000d3ae0e432)"
+            #   }
+            # },
+            # the below is for accounting 'NodeLost' scenario, where-in the containers in the lost node/pod(s) is still being reported as running
+            if podReadyCondition == false
+              record["ContainerStatus"] = "Unknown"
+            else
+              record["ContainerStatus"] = containerStatus.keys[0]
+            end
+            #TODO : Remove ContainerCreationTimeStamp from here since we are sending it as a metric
+            #Picking up both container and node start time from cAdvisor to be consistent
+            if containerStatus.keys[0] == "running"
+              record["ContainerCreationTimeStamp"] = container["state"]["running"]["startedAt"]
+            else
+              if !containerStatus[containerStatus.keys[0]]["reason"].nil? && !containerStatus[containerStatus.keys[0]]["reason"].empty?
+                record["ContainerStatusReason"] = containerStatus[containerStatus.keys[0]]["reason"]
+              end
+              # Process the record to see if job was completed 6 hours ago. If so, send metric to mdm
+              if !record["ControllerKind"].nil? && record["ControllerKind"].downcase == Constants::CONTROLLER_KIND_JOB
+                @inventoryToMdmConvertor.process_record_for_terminated_job_metric(record["ControllerName"], record["Namespace"], containerStatus)
+              end
+            end
+
+            # Record the last state of the container. This may have information on why a container was killed.
+            begin
+              if !container["lastState"].nil? && container["lastState"].keys.length == 1
+                lastStateName = container["lastState"].keys[0]
+                lastStateObject = container["lastState"][lastStateName]
+                if !lastStateObject.is_a?(Hash)
+                  raise "expected a hash object. This could signify a bug or a kubernetes API change"
+                end
+
+                if lastStateObject.key?("reason") && lastStateObject.key?("startedAt") && lastStateObject.key?("finishedAt")
+                  newRecord = Hash.new
+                  newRecord["lastState"] = lastStateName  # get the name of the last state (ex: terminated)
+                  lastStateReason = lastStateObject["reason"]
+                  # newRecord["reason"] = lastStateObject["reason"]  # (ex: OOMKilled)
+                  newRecord["reason"] = lastStateReason  # (ex: OOMKilled)
+                  newRecord["startedAt"] = lastStateObject["startedAt"]  # (ex: 2019-07-02T14:58:51Z)
+                  lastFinishedTime = lastStateObject["finishedAt"]
+                  newRecord["finishedAt"] = lastFinishedTime  # (ex: 2019-07-02T14:58:52Z)
+
+                  # only write to the output field if everything previously ran without error
+                  record["ContainerLastStatus"] = newRecord
+
+                  #Populate mdm metric for OOMKilled container count if lastStateReason is OOMKilled
+                  if lastStateReason.downcase == Constants::REASON_OOM_KILLED
+                    @inventoryToMdmConvertor.process_record_for_oom_killed_metric(record["ControllerName"], record["Namespace"], lastFinishedTime)
+                  end
+                  lastStateReason = nil
+                else
+                  record["ContainerLastStatus"] = Hash.new
+                end
+              else
+                record["ContainerLastStatus"] = Hash.new
+              end
+
+              #Populate mdm metric for container restart count if greater than 0
+              if (!containerRestartCount.nil? && (containerRestartCount.is_a? Integer) && containerRestartCount > 0)
+                @inventoryToMdmConvertor.process_record_for_container_restarts_metric(record["ControllerName"], record["Namespace"], lastFinishedTime)
+              end
+            rescue => errorStr
+              $log.warn "Failed in parse_and_emit_record pod inventory while processing ContainerLastStatus: #{errorStr}"
+              $log.debug_backtrace(errorStr.backtrace)
+              ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
+              record["ContainerLastStatus"] = Hash.new
+            end
+
+            podRestartCount += containerRestartCount
+            records.push(record.dup)
+          end
+        else # for unscheduled pods there are no status.containerStatuses, in this case we still want the pod
+          records.push(record)
+        end  #container status block end
+
+        records.each do |record|
+          if !record.nil?
+            record["PodRestartCount"] = podRestartCount
+          end
+        end
+      rescue => error
+        $log.warn("getPodInventoryRecords failed: #{error}")
+      end
+      return records
+    end
+
+    # TODO - move this method to KubernetesClient or helper class
+    def getServiceNameFromLabels(namespace, labels, serviceRecords)
       serviceName = ""
       begin
         if !labels.nil? && !labels.empty?
-          if (!serviceList.nil? && !serviceList.empty? && serviceList.key?("items") && !serviceList["items"].empty?)
-            serviceList["items"].each do |item|
-              found = 0
-              if !item["spec"].nil? && !item["spec"]["selector"].nil? && item["metadata"]["namespace"] == namespace
-                selectorLabels = item["spec"]["selector"]
-                if !selectorLabels.empty?
-                  selectorLabels.each do |key, value|
-                    if !(labels.select { |k, v| k == key && v == value }.length > 0)
-                      break
-                    end
-                    found = found + 1
+          serviceRecords.each do |kubeServiceRecord|
+            found = 0
+            if kubeServiceRecord["Namespace"] == namespace
+              selectorLabels = {}
+              # selector labels wrapped in array in kube service records so unwrapping here
+              if !kubeServiceRecord["SelectorLabels"].nil? && kubeServiceRecord["SelectorLabels"].length > 0
+                selectorLabels = kubeServiceRecord["SelectorLabels"][0]
+              end
+              if !selectorLabels.nil? && !selectorLabels.empty?
+                selectorLabels.each do |key, value|
+                  if !(labels.select { |k, v| k == key && v == value }.length > 0)
+                    break
                   end
+                  found = found + 1
                 end
+                # service can have no selectors
                 if found == selectorLabels.length
-                  return item["metadata"]["name"]
+                  return kubeServiceRecord["ServiceName"]
                 end
               end
             end
diff --git a/source/plugins/ruby/in_kubestate_deployments.rb b/source/plugins/ruby/in_kubestate_deployments.rb
index bcf397150..27e4709a2 100644
--- a/source/plugins/ruby/in_kubestate_deployments.rb
+++ b/source/plugins/ruby/in_kubestate_deployments.rb
@@ -2,230 +2,238 @@
 # frozen_string_literal: true
 
 module Fluent
-    class Kube_Kubestate_Deployments_Input < Input
-      Plugin.register_input("kubestatedeployments", self)
-      @@istestvar = ENV["ISTEST"]
-      # telemetry - To keep telemetry cost reasonable, we keep track of the max deployments over a period of 15m
-      @@deploymentsCount = 0
-      
-      
-  
-      def initialize
-        super
-        require "yajl/json_gem"
-        require "yajl"
-        require "date"
-        require "time"
-  
-        require_relative "KubernetesApiClient"
-        require_relative "oms_common"
-        require_relative "omslog"
-        require_relative "ApplicationInsightsUtility"
-        require_relative "constants"
-  
-        # roughly each deployment is 8k
-        # 1000 deployments account to approximately 8MB
-        @DEPLOYMENTS_CHUNK_SIZE = 1000
-        @DEPLOYMENTS_API_GROUP = "apps"
-        @@telemetryLastSentTime = DateTime.now.to_time.to_i
-  
-        
-        @deploymentsRunningTotal = 0
-  
-        @NodeName = OMS::Common.get_hostname
-        @ClusterId = KubernetesApiClient.getClusterId
-        @ClusterName = KubernetesApiClient.getClusterName
-      end
-  
-      config_param :run_interval, :time, :default => 60
-      config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG
-  
-      def configure(conf)
-        super
-      end
-  
-      def start
-        if @run_interval
-          @finished = false
-          @condition = ConditionVariable.new
-          @mutex = Mutex.new
-          @thread = Thread.new(&method(:run_periodic))
+  class Kube_Kubestate_Deployments_Input < Input
+    Plugin.register_input("kubestatedeployments", self)
+    @@istestvar = ENV["ISTEST"]
+    # telemetry - To keep telemetry cost reasonable, we keep track of the max deployments over a period of 15m
+    @@deploymentsCount = 0
+
+    def initialize
+      super
+      require "yajl/json_gem"
+      require "yajl"
+      require "date"
+      require "time"
+
+      require_relative "KubernetesApiClient"
+      require_relative "oms_common"
+      require_relative "omslog"
+      require_relative "ApplicationInsightsUtility"
+      require_relative "constants"
+
+      # refer tomlparser-agent-config for defaults
+      # this configurable via configmap
+      @DEPLOYMENTS_CHUNK_SIZE = 0
+
+      @DEPLOYMENTS_API_GROUP = "apps"
+      @@telemetryLastSentTime = DateTime.now.to_time.to_i
+
+      @deploymentsRunningTotal = 0
+
+      @NodeName = OMS::Common.get_hostname
+      @ClusterId = KubernetesApiClient.getClusterId
+      @ClusterName = KubernetesApiClient.getClusterName
+    end
+
+    config_param :run_interval, :time, :default => 60
+    config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG
+
+    def configure(conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        if !ENV["DEPLOYMENTS_CHUNK_SIZE"].nil? && !ENV["DEPLOYMENTS_CHUNK_SIZE"].empty? && ENV["DEPLOYMENTS_CHUNK_SIZE"].to_i > 0
+          @DEPLOYMENTS_CHUNK_SIZE = ENV["DEPLOYMENTS_CHUNK_SIZE"].to_i
+        else
+          # this shouldnt happen just setting default here as safe guard
+          $log.warn("in_kubestate_deployments::start: setting to default value since got DEPLOYMENTS_CHUNK_SIZE nil or empty")
+          @DEPLOYMENTS_CHUNK_SIZE = 500
         end
+        $log.info("in_kubestate_deployments::start : DEPLOYMENTS_CHUNK_SIZE  @ #{@DEPLOYMENTS_CHUNK_SIZE}")
+
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
       end
-  
-      def shutdown
-        if @run_interval
-          @mutex.synchronize {
-            @finished = true
-            @condition.signal
-          }
-          @thread.join
-        end
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
       end
-  
-      def enumerate
-        begin
-          deploymentList = nil
-          currentTime = Time.now
-          batchTime = currentTime.utc.iso8601
-          
-          #set the running total for this batch to 0
-          @deploymentsRunningTotal = 0
-  
-          # Initializing continuation token to nil
-          continuationToken = nil
-          $log.info("in_kubestate_deployments::enumerate : Getting deployments from Kube API @ #{Time.now.utc.iso8601}")
-          continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}", api_group: @DEPLOYMENTS_API_GROUP)
-          $log.info("in_kubestate_deployments::enumerate : Done getting deployments from Kube API @ #{Time.now.utc.iso8601}")
+    end
+
+    def enumerate
+      begin
+        deploymentList = nil
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
+
+        #set the running total for this batch to 0
+        @deploymentsRunningTotal = 0
+
+        # Initializing continuation token to nil
+        continuationToken = nil
+        $log.info("in_kubestate_deployments::enumerate : Getting deployments from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}", api_group: @DEPLOYMENTS_API_GROUP)
+        $log.info("in_kubestate_deployments::enumerate : Done getting deployments from Kube API @ #{Time.now.utc.iso8601}")
+        if (!deploymentList.nil? && !deploymentList.empty? && deploymentList.key?("items") && !deploymentList["items"].nil? && !deploymentList["items"].empty?)
+          $log.info("in_kubestate_deployments::enumerate : number of deployment items :#{deploymentList["items"].length} from Kube API @ #{Time.now.utc.iso8601}")
+          parse_and_emit_records(deploymentList, batchTime)
+        else
+          $log.warn "in_kubestate_deployments::enumerate:Received empty deploymentList"
+        end
+
+        #If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}&continue=#{continuationToken}", api_group: @DEPLOYMENTS_API_GROUP)
           if (!deploymentList.nil? && !deploymentList.empty? && deploymentList.key?("items") && !deploymentList["items"].nil? && !deploymentList["items"].empty?)
+            $log.info("in_kubestate_deployments::enumerate : number of deployment items :#{deploymentList["items"].length} from Kube API @ #{Time.now.utc.iso8601}")
             parse_and_emit_records(deploymentList, batchTime)
           else
             $log.warn "in_kubestate_deployments::enumerate:Received empty deploymentList"
           end
-  
-          #If we receive a continuation token, make calls, process and flush data until we have processed all data
-          while (!continuationToken.nil? && !continuationToken.empty?)
-            continuationToken, deploymentList = KubernetesApiClient.getResourcesAndContinuationToken("deployments?limit=#{@DEPLOYMENTS_CHUNK_SIZE}&continue=#{continuationToken}", api_group: @DEPLOYMENTS_API_GROUP)
-            if (!deploymentList.nil? && !deploymentList.empty? && deploymentList.key?("items") && !deploymentList["items"].nil? && !deploymentList["items"].empty?)
-              parse_and_emit_records(deploymentList, batchTime)
-            else
-              $log.warn "in_kubestate_deployments::enumerate:Received empty deploymentList"
-            end
+        end
+
+        # Setting this to nil so that we dont hold memory until GC kicks in
+        deploymentList = nil
+
+        $log.info("successfully emitted a total of #{@deploymentsRunningTotal} kube_state_deployment metrics")
+        # Flush AppInsights telemetry once all the processing is done, only if the number of events flushed is greater than 0
+        if (@deploymentsRunningTotal > @@deploymentsCount)
+          @@deploymentsCount = @deploymentsRunningTotal
+        end
+        if (((DateTime.now.to_time.to_i - @@telemetryLastSentTime).abs) / 60) >= Constants::KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES
+          #send telemetry
+          $log.info "sending deployemt telemetry..."
+          ApplicationInsightsUtility.sendMetricTelemetry("MaxDeploymentCount", @@deploymentsCount, {})
+          #reset last sent value & time
+          @@deploymentsCount = 0
+          @@telemetryLastSentTime = DateTime.now.to_time.to_i
+        end
+      rescue => errorStr
+        $log.warn "in_kubestate_deployments::enumerate:Failed in enumerate: #{errorStr}"
+        ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::enumerate:Failed in enumerate: #{errorStr}")
+      end
+    end # end enumerate
+
+    def parse_and_emit_records(deployments, batchTime = Time.utc.iso8601)
+      metricItems = []
+      insightsMetricsEventStream = MultiEventStream.new
+      begin
+        metricInfo = deployments
+        metricInfo["items"].each do |deployment|
+          deploymentName = deployment["metadata"]["name"]
+          deploymentNameSpace = deployment["metadata"]["namespace"]
+          deploymentCreatedTime = ""
+          if !deployment["metadata"]["creationTimestamp"].nil?
+            deploymentCreatedTime = deployment["metadata"]["creationTimestamp"]
+          end
+          deploymentStrategy = "RollingUpdate" #default when not specified as per spec
+          if !deployment["spec"]["strategy"].nil? && !deployment["spec"]["strategy"]["type"].nil?
+            deploymentStrategy = deployment["spec"]["strategy"]["type"]
           end
-  
-          # Setting this to nil so that we dont hold memory until GC kicks in
-          deploymentList = nil
-  
-          $log.info("successfully emitted a total of #{@deploymentsRunningTotal} kube_state_deployment metrics")
-          # Flush AppInsights telemetry once all the processing is done, only if the number of events flushed is greater than 0
-          if (@deploymentsRunningTotal > @@deploymentsCount)
-            @@deploymentsCount = @deploymentsRunningTotal
+          deploymentSpecReplicas = 1 #default is 1 as per k8s spec
+          if !deployment["spec"]["replicas"].nil?
+            deploymentSpecReplicas = deployment["spec"]["replicas"]
           end
-          if (((DateTime.now.to_time.to_i - @@telemetryLastSentTime).abs)/60 ) >= Constants::KUBE_STATE_TELEMETRY_FLUSH_INTERVAL_IN_MINUTES
-            #send telemetry
-            $log.info "sending deployemt telemetry..."
-            ApplicationInsightsUtility.sendMetricTelemetry("MaxDeploymentCount", @@deploymentsCount, {})
-            #reset last sent value & time
-            @@deploymentsCount = 0
-            @@telemetryLastSentTime = DateTime.now.to_time.to_i
+          deploymentStatusReadyReplicas = 0
+          if !deployment["status"]["readyReplicas"].nil?
+            deploymentStatusReadyReplicas = deployment["status"]["readyReplicas"]
           end
-        rescue => errorStr
-          $log.warn "in_kubestate_deployments::enumerate:Failed in enumerate: #{errorStr}"
-          ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::enumerate:Failed in enumerate: #{errorStr}")
+          deploymentStatusUpToDateReplicas = 0
+          if !deployment["status"]["updatedReplicas"].nil?
+            deploymentStatusUpToDateReplicas = deployment["status"]["updatedReplicas"]
+          end
+          deploymentStatusAvailableReplicas = 0
+          if !deployment["status"]["availableReplicas"].nil?
+            deploymentStatusAvailableReplicas = deployment["status"]["availableReplicas"]
+          end
+
+          metricItem = {}
+          metricItem["CollectionTime"] = batchTime
+          metricItem["Computer"] = @NodeName
+          metricItem["Name"] = Constants::INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_DEPLOYMENT_STATE
+          metricItem["Value"] = deploymentStatusReadyReplicas
+          metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
+          metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE
+
+          metricTags = {}
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = @ClusterId
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = @ClusterName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_NAME] = deploymentName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = deploymentNameSpace
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STRATEGY] = deploymentStrategy
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME] = deploymentCreatedTime
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_SPEC_REPLICAS] = deploymentSpecReplicas
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_UPDATED] = deploymentStatusUpToDateReplicas
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_AVAILABLE] = deploymentStatusAvailableReplicas
+
+          metricItem["Tags"] = metricTags
+
+          metricItems.push(metricItem)
         end
-      end # end enumerate
-  
-      def parse_and_emit_records(deployments, batchTime = Time.utc.iso8601)
-        metricItems = []
-        insightsMetricsEventStream = MultiEventStream.new
-        begin
-            metricInfo = deployments
-            metricInfo["items"].each do |deployment|
-                deploymentName = deployment["metadata"]["name"]
-                deploymentNameSpace = deployment["metadata"]["namespace"]
-                deploymentCreatedTime = ""
-                if !deployment["metadata"]["creationTimestamp"].nil?
-                    deploymentCreatedTime = deployment["metadata"]["creationTimestamp"]
-                end
-                deploymentStrategy = "RollingUpdate" #default when not specified as per spec
-                if !deployment["spec"]["strategy"].nil? && !deployment["spec"]["strategy"]["type"].nil?
-                    deploymentStrategy = deployment["spec"]["strategy"]["type"]
-                end
-                deploymentSpecReplicas = 1 #default is 1 as per k8s spec
-                if !deployment["spec"]["replicas"].nil?
-                    deploymentSpecReplicas = deployment["spec"]["replicas"]
-                end
-                deploymentStatusReadyReplicas = 0
-                if !deployment["status"]["readyReplicas"].nil?
-                    deploymentStatusReadyReplicas = deployment["status"]["readyReplicas"]
-                end
-                deploymentStatusUpToDateReplicas = 0
-                if !deployment["status"]["updatedReplicas"].nil?
-                    deploymentStatusUpToDateReplicas = deployment["status"]["updatedReplicas"]
-                end
-                deploymentStatusAvailableReplicas = 0
-                if !deployment["status"]["availableReplicas"].nil?
-                    deploymentStatusAvailableReplicas = deployment["status"]["availableReplicas"]
-                end
-                
-                metricItem = {}
-                metricItem["CollectionTime"] = batchTime
-                metricItem["Computer"] = @NodeName
-                metricItem["Name"] = Constants::INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_DEPLOYMENT_STATE
-                metricItem["Value"] = deploymentStatusReadyReplicas
-                metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
-                metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE
-
-                metricTags = {}
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = @ClusterId
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = @ClusterName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_NAME] = deploymentName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = deploymentNameSpace
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STRATEGY ] = deploymentStrategy
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME] = deploymentCreatedTime
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_SPEC_REPLICAS] = deploymentSpecReplicas
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_UPDATED] = deploymentStatusUpToDateReplicas
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_DEPLOYMENT_STATUS_REPLICAS_AVAILABLE] = deploymentStatusAvailableReplicas
-                
-
-                metricItem["Tags"] = metricTags
-
-                metricItems.push(metricItem)
-            end
-
-            time = Time.now.to_f
-            metricItems.each do |insightsMetricsRecord|
-                wrapper = {
-                  "DataType" => "INSIGHTS_METRICS_BLOB",
-                  "IPName" => "ContainerInsights",
-                  "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
-                }
-                insightsMetricsEventStream.add(time, wrapper) if wrapper
-            end
-    
-            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-            $log.info("successfully emitted #{metricItems.length()} kube_state_deployment metrics")
-            @deploymentsRunningTotal = @deploymentsRunningTotal + metricItems.length()
-            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
-                $log.info("kubestatedeploymentsInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-            end
-        rescue => error
-            $log.warn("in_kubestate_deployments::parse_and_emit_records failed: #{error} ")
-            ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::parse_and_emit_records failed: #{error}")
+
+        time = Time.now.to_f
+        metricItems.each do |insightsMetricsRecord|
+          wrapper = {
+            "DataType" => "INSIGHTS_METRICS_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+          }
+          insightsMetricsEventStream.add(time, wrapper) if wrapper
+        end
+
+        router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+        $log.info("successfully emitted #{metricItems.length()} kube_state_deployment metrics")
+
+        @deploymentsRunningTotal = @deploymentsRunningTotal + metricItems.length()
+        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
+          $log.info("kubestatedeploymentsInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
         end
-        
+      rescue => error
+        $log.warn("in_kubestate_deployments::parse_and_emit_records failed: #{error} ")
+        ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::parse_and_emit_records failed: #{error}")
       end
-  
-      def run_periodic
-        @mutex.lock
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
+      until done
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
-        @nextTimeToRun = Time.now
-        @waitTimeout = @run_interval
-        until done
-          @nextTimeToRun = @nextTimeToRun + @run_interval
-          @now = Time.now
-          if @nextTimeToRun <= @now
-            @waitTimeout = 1
-            @nextTimeToRun = @now
-          else
-            @waitTimeout = @nextTimeToRun - @now
-          end
-          @condition.wait(@mutex, @waitTimeout)
-          done = @finished
-          @mutex.unlock
-          if !done
-            begin
-              $log.info("in_kubestate_deployments::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
-              enumerate
-              $log.info("in_kubestate_deployments::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
-            rescue => errorStr
-              $log.warn "in_kubestate_deployments::run_periodic: enumerate Failed to retrieve kube deployments: #{errorStr}"
-              ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::run_periodic: enumerate Failed to retrieve kube deployments: #{errorStr}")
-            end
+        @mutex.unlock
+        if !done
+          begin
+            $log.info("in_kubestate_deployments::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
+            enumerate
+            $log.info("in_kubestate_deployments::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
+          rescue => errorStr
+            $log.warn "in_kubestate_deployments::run_periodic: enumerate Failed to retrieve kube deployments: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_deployments::run_periodic: enumerate Failed to retrieve kube deployments: #{errorStr}")
           end
-          @mutex.lock
         end
-        @mutex.unlock
+        @mutex.lock
       end
+      @mutex.unlock
     end
-end
\ No newline at end of file
+  end
+end
diff --git a/source/plugins/ruby/in_kubestate_hpa.rb b/source/plugins/ruby/in_kubestate_hpa.rb
index 3ce63a75a..afecf8e3b 100644
--- a/source/plugins/ruby/in_kubestate_hpa.rb
+++ b/source/plugins/ruby/in_kubestate_hpa.rb
@@ -2,231 +2,236 @@
 # frozen_string_literal: true
 
 module Fluent
-    class Kube_Kubestate_HPA_Input < Input
-      Plugin.register_input("kubestatehpa", self)
-      @@istestvar = ENV["ISTEST"]
-      
-  
-      def initialize
-        super
-        require "yajl/json_gem"
-        require "yajl"
-        require "time"
-  
-        require_relative "KubernetesApiClient"
-        require_relative "oms_common"
-        require_relative "omslog"
-        require_relative "ApplicationInsightsUtility"
-        require_relative "constants"
-  
-        # roughly each HPA is 3k
-        # 2000 HPAs account to approximately 6-7MB
-        @HPA_CHUNK_SIZE = 2000
-        @HPA_API_GROUP = "autoscaling"
-  
-        # telemetry
-        @hpaCount = 0
-  
-        @NodeName = OMS::Common.get_hostname
-        @ClusterId = KubernetesApiClient.getClusterId
-        @ClusterName = KubernetesApiClient.getClusterName
-      end
-  
-      config_param :run_interval, :time, :default => 60
-      config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG
-  
-      def configure(conf)
-        super
-      end
-  
-      def start
-        if @run_interval
-          @finished = false
-          @condition = ConditionVariable.new
-          @mutex = Mutex.new
-          @thread = Thread.new(&method(:run_periodic))
+  class Kube_Kubestate_HPA_Input < Input
+    Plugin.register_input("kubestatehpa", self)
+    @@istestvar = ENV["ISTEST"]
+
+    def initialize
+      super
+      require "yajl/json_gem"
+      require "yajl"
+      require "time"
+
+      require_relative "KubernetesApiClient"
+      require_relative "oms_common"
+      require_relative "omslog"
+      require_relative "ApplicationInsightsUtility"
+      require_relative "constants"
+
+      # refer tomlparser-agent-config for defaults
+      # this configurable via configmap
+      @HPA_CHUNK_SIZE = 0
+
+      @HPA_API_GROUP = "autoscaling"
+
+      # telemetry
+      @hpaCount = 0
+
+      @NodeName = OMS::Common.get_hostname
+      @ClusterId = KubernetesApiClient.getClusterId
+      @ClusterName = KubernetesApiClient.getClusterName
+    end
+
+    config_param :run_interval, :time, :default => 60
+    config_param :tag, :string, :default => Constants::INSIGHTSMETRICS_FLUENT_TAG
+
+    def configure(conf)
+      super
+    end
+
+    def start
+      if @run_interval
+        if !ENV["HPA_CHUNK_SIZE"].nil? && !ENV["HPA_CHUNK_SIZE"].empty? && ENV["HPA_CHUNK_SIZE"].to_i > 0
+          @HPA_CHUNK_SIZE = ENV["HPA_CHUNK_SIZE"].to_i
+        else
+          # this shouldnt happen just setting default here as safe guard
+          $log.warn("in_kubestate_hpa::start: setting to default value since got HPA_CHUNK_SIZE nil or empty")
+          @HPA_CHUNK_SIZE = 2000
         end
+        $log.info("in_kubestate_hpa::start : HPA_CHUNK_SIZE  @ #{@HPA_CHUNK_SIZE}")
+
+        @finished = false
+        @condition = ConditionVariable.new
+        @mutex = Mutex.new
+        @thread = Thread.new(&method(:run_periodic))
       end
-  
-      def shutdown
-        if @run_interval
-          @mutex.synchronize {
-            @finished = true
-            @condition.signal
-          }
-          @thread.join
-        end
+    end
+
+    def shutdown
+      if @run_interval
+        @mutex.synchronize {
+          @finished = true
+          @condition.signal
+        }
+        @thread.join
       end
-  
-      def enumerate
-        begin
-          hpaList = nil
-          currentTime = Time.now
-          batchTime = currentTime.utc.iso8601
-          
-          @hpaCount = 0
-  
-          # Initializing continuation token to nil
-          continuationToken = nil
-          $log.info("in_kubestate_hpa::enumerate : Getting HPAs from Kube API @ #{Time.now.utc.iso8601}")
-          continuationToken, hpaList = KubernetesApiClient.getResourcesAndContinuationToken("horizontalpodautoscalers?limit=#{@HPA_CHUNK_SIZE}", api_group: @HPA_API_GROUP)
-          $log.info("in_kubestate_hpa::enumerate : Done getting HPAs from Kube API @ #{Time.now.utc.iso8601}")
+    end
+
+    def enumerate
+      begin
+        hpaList = nil
+        currentTime = Time.now
+        batchTime = currentTime.utc.iso8601
+
+        @hpaCount = 0
+
+        # Initializing continuation token to nil
+        continuationToken = nil
+        $log.info("in_kubestate_hpa::enumerate : Getting HPAs from Kube API @ #{Time.now.utc.iso8601}")
+        continuationToken, hpaList = KubernetesApiClient.getResourcesAndContinuationToken("horizontalpodautoscalers?limit=#{@HPA_CHUNK_SIZE}", api_group: @HPA_API_GROUP)
+        $log.info("in_kubestate_hpa::enumerate : Done getting HPAs from Kube API @ #{Time.now.utc.iso8601}")
+        if (!hpaList.nil? && !hpaList.empty? && hpaList.key?("items") && !hpaList["items"].nil? && !hpaList["items"].empty?)
+          parse_and_emit_records(hpaList, batchTime)
+        else
+          $log.warn "in_kubestate_hpa::enumerate:Received empty hpaList"
+        end
+
+        #If we receive a continuation token, make calls, process and flush data until we have processed all data
+        while (!continuationToken.nil? && !continuationToken.empty?)
+          continuationToken, hpaList = KubernetesApiClient.getResourcesAndContinuationToken("horizontalpodautoscalers?limit=#{@HPA_CHUNK_SIZE}&continue=#{continuationToken}", api_group: @HPA_API_GROUP)
           if (!hpaList.nil? && !hpaList.empty? && hpaList.key?("items") && !hpaList["items"].nil? && !hpaList["items"].empty?)
             parse_and_emit_records(hpaList, batchTime)
           else
             $log.warn "in_kubestate_hpa::enumerate:Received empty hpaList"
           end
-  
-          #If we receive a continuation token, make calls, process and flush data until we have processed all data
-          while (!continuationToken.nil? && !continuationToken.empty?)
-            continuationToken, hpaList = KubernetesApiClient.getResourcesAndContinuationToken("horizontalpodautoscalers?limit=#{@HPA_CHUNK_SIZE}&continue=#{continuationToken}", api_group: @HPA_API_GROUP)
-            if (!hpaList.nil? && !hpaList.empty? && hpaList.key?("items") && !hpaList["items"].nil? && !hpaList["items"].empty?)
-              parse_and_emit_records(hpaList, batchTime)
-            else
-              $log.warn "in_kubestate_hpa::enumerate:Received empty hpaList"
+        end
+
+        # Setting this to nil so that we dont hold memory until GC kicks in
+        hpaList = nil
+
+        # Flush AppInsights telemetry once all the processing is done, only if the number of events flushed is greater than 0
+        if (@hpaCount > 0)
+          # this will not be a useful telemetry, as hpa counts will not be huge, just log for now
+          $log.info("in_kubestate_hpa::hpaCount= #{hpaCount}")
+          #ApplicationInsightsUtility.sendMetricTelemetry("HPACount", @hpaCount, {})
+        end
+      rescue => errorStr
+        $log.warn "in_kubestate_hpa::enumerate:Failed in enumerate: #{errorStr}"
+        ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::enumerate:Failed in enumerate: #{errorStr}")
+      end
+    end # end enumerate
+
+    def parse_and_emit_records(hpas, batchTime = Time.utc.iso8601)
+      metricItems = []
+      insightsMetricsEventStream = MultiEventStream.new
+      begin
+        metricInfo = hpas
+        metricInfo["items"].each do |hpa|
+          hpaName = hpa["metadata"]["name"]
+          hpaNameSpace = hpa["metadata"]["namespace"]
+          hpaCreatedTime = ""
+          if !hpa["metadata"]["creationTimestamp"].nil?
+            hpaCreatedTime = hpa["metadata"]["creationTimestamp"]
+          end
+          hpaSpecMinReplicas = 1 #default is 1 as per k8s spec
+          if !hpa["spec"]["minReplicas"].nil?
+            hpaSpecMinReplicas = hpa["spec"]["minReplicas"]
+          end
+          hpaSpecMaxReplicas = 0
+          if !hpa["spec"]["maxReplicas"].nil?
+            hpaSpecMaxReplicas = hpa["spec"]["maxReplicas"]
+          end
+          hpaSpecScaleTargetKind = ""
+          hpaSpecScaleTargetName = ""
+          if !hpa["spec"]["scaleTargetRef"].nil?
+            if !hpa["spec"]["scaleTargetRef"]["kind"].nil?
+              hpaSpecScaleTargetKind = hpa["spec"]["scaleTargetRef"]["kind"]
+            end
+            if !hpa["spec"]["scaleTargetRef"]["name"].nil?
+              hpaSpecScaleTargetName = hpa["spec"]["scaleTargetRef"]["name"]
             end
           end
-  
-          # Setting this to nil so that we dont hold memory until GC kicks in
-          hpaList = nil
-  
-          # Flush AppInsights telemetry once all the processing is done, only if the number of events flushed is greater than 0
-          if (@hpaCount > 0)
-            # this will not be a useful telemetry, as hpa counts will not be huge, just log for now
-            $log.info("in_kubestate_hpa::hpaCount= #{hpaCount}")
-            #ApplicationInsightsUtility.sendMetricTelemetry("HPACount", @hpaCount, {})
+          hpaStatusCurrentReplicas = 0
+          if !hpa["status"]["currentReplicas"].nil?
+            hpaStatusCurrentReplicas = hpa["status"]["currentReplicas"]
           end
-        rescue => errorStr
-          $log.warn "in_kubestate_hpa::enumerate:Failed in enumerate: #{errorStr}"
-          ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::enumerate:Failed in enumerate: #{errorStr}")
+          hpaStatusDesiredReplicas = 0
+          if !hpa["status"]["desiredReplicas"].nil?
+            hpaStatusDesiredReplicas = hpa["status"]["desiredReplicas"]
+          end
+
+          hpaStatuslastScaleTime = ""
+          if !hpa["status"]["lastScaleTime"].nil?
+            hpaStatuslastScaleTime = hpa["status"]["lastScaleTime"]
+          end
+
+          metricItem = {}
+          metricItem["CollectionTime"] = batchTime
+          metricItem["Computer"] = @NodeName
+          metricItem["Name"] = Constants::INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_HPA_STATE
+          metricItem["Value"] = hpaStatusCurrentReplicas
+          metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
+          metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE
+
+          metricTags = {}
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = @ClusterId
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = @ClusterName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_NAME] = hpaName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = hpaNameSpace
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME] = hpaCreatedTime
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MIN_REPLICAS] = hpaSpecMinReplicas
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MAX_REPLICAS] = hpaSpecMaxReplicas
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_KIND] = hpaSpecScaleTargetKind
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_NAME] = hpaSpecScaleTargetName
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_DESIRED_REPLICAS] = hpaStatusDesiredReplicas
+          metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_LAST_SCALE_TIME] = hpaStatuslastScaleTime
+
+          metricItem["Tags"] = metricTags
+
+          metricItems.push(metricItem)
         end
-      end # end enumerate
-  
-      def parse_and_emit_records(hpas, batchTime = Time.utc.iso8601)
-        metricItems = []
-        insightsMetricsEventStream = MultiEventStream.new
-        begin
-            metricInfo = hpas
-            metricInfo["items"].each do |hpa|
-                hpaName = hpa["metadata"]["name"]
-                hpaNameSpace = hpa["metadata"]["namespace"]
-                hpaCreatedTime = ""
-                if !hpa["metadata"]["creationTimestamp"].nil?
-                    hpaCreatedTime = hpa["metadata"]["creationTimestamp"]
-                end
-                hpaSpecMinReplicas = 1 #default is 1 as per k8s spec
-                if !hpa["spec"]["minReplicas"].nil?
-                    hpaSpecMinReplicas = hpa["spec"]["minReplicas"]
-                end
-                hpaSpecMaxReplicas = 0
-                if !hpa["spec"]["maxReplicas"].nil?
-                    hpaSpecMaxReplicas = hpa["spec"]["maxReplicas"]
-                end
-                hpaSpecScaleTargetKind = ""
-                hpaSpecScaleTargetName = ""
-                if !hpa["spec"]["scaleTargetRef"].nil?
-                    if !hpa["spec"]["scaleTargetRef"]["kind"].nil?
-                        hpaSpecScaleTargetKind = hpa["spec"]["scaleTargetRef"]["kind"]
-                    end
-                    if !hpa["spec"]["scaleTargetRef"]["name"].nil?
-                        hpaSpecScaleTargetName = hpa["spec"]["scaleTargetRef"]["name"]
-                    end
-
-                end
-                hpaStatusCurrentReplicas = 0
-                if !hpa["status"]["currentReplicas"].nil?
-                    hpaStatusCurrentReplicas = hpa["status"]["currentReplicas"]
-                end
-                hpaStatusDesiredReplicas = 0
-                if !hpa["status"]["desiredReplicas"].nil?
-                    hpaStatusDesiredReplicas = hpa["status"]["desiredReplicas"]
-                end
-                
-                hpaStatuslastScaleTime = ""
-                if !hpa["status"]["lastScaleTime"].nil?
-                    hpaStatuslastScaleTime = hpa["status"]["lastScaleTime"]
-                end
-                
-                
-                metricItem = {}
-                metricItem["CollectionTime"] = batchTime
-                metricItem["Computer"] = @NodeName
-                metricItem["Name"] = Constants::INSIGHTSMETRICS_METRIC_NAME_KUBE_STATE_HPA_STATE
-                metricItem["Value"] = hpaStatusCurrentReplicas
-                metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
-                metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_KUBESTATE_NAMESPACE
-
-                metricTags = {}
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = @ClusterId
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = @ClusterName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_NAME] = hpaName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = hpaNameSpace
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_CREATIONTIME] = hpaCreatedTime
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MIN_REPLICAS] = hpaSpecMinReplicas
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_MAX_REPLICAS] = hpaSpecMaxReplicas
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_KIND] = hpaSpecScaleTargetKind
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_SPEC_SCALE_TARGET_NAME] = hpaSpecScaleTargetName
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_DESIRED_REPLICAS] = hpaStatusDesiredReplicas
-                metricTags[Constants::INSIGHTSMETRICS_TAGS_KUBE_STATE_HPA_STATUS_LAST_SCALE_TIME] = hpaStatuslastScaleTime
-                
-
-                metricItem["Tags"] = metricTags
-
-                metricItems.push(metricItem)
-            end
 
-            time = Time.now.to_f
-            metricItems.each do |insightsMetricsRecord|
-                wrapper = {
-                  "DataType" => "INSIGHTS_METRICS_BLOB",
-                  "IPName" => "ContainerInsights",
-                  "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
-                }
-                insightsMetricsEventStream.add(time, wrapper) if wrapper
-            end
-    
-            router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
-            $log.info("successfully emitted #{metricItems.length()} kube_state_hpa metrics")
-            if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
-                $log.info("kubestatehpaInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
-            end
-        rescue => error
-            $log.warn("in_kubestate_hpa::parse_and_emit_records failed: #{error} ")
-            ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::parse_and_emit_records failed: #{error}")
+        time = Time.now.to_f
+        metricItems.each do |insightsMetricsRecord|
+          wrapper = {
+            "DataType" => "INSIGHTS_METRICS_BLOB",
+            "IPName" => "ContainerInsights",
+            "DataItems" => [insightsMetricsRecord.each { |k, v| insightsMetricsRecord[k] = v }],
+          }
+          insightsMetricsEventStream.add(time, wrapper) if wrapper
+        end
+
+        router.emit_stream(Constants::INSIGHTSMETRICS_FLUENT_TAG, insightsMetricsEventStream) if insightsMetricsEventStream
+        $log.info("successfully emitted #{metricItems.length()} kube_state_hpa metrics")
+        if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
+          $log.info("kubestatehpaInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
         end
-        
+      rescue => error
+        $log.warn("in_kubestate_hpa::parse_and_emit_records failed: #{error} ")
+        ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::parse_and_emit_records failed: #{error}")
       end
-  
-      def run_periodic
-        @mutex.lock
+    end
+
+    def run_periodic
+      @mutex.lock
+      done = @finished
+      @nextTimeToRun = Time.now
+      @waitTimeout = @run_interval
+      until done
+        @nextTimeToRun = @nextTimeToRun + @run_interval
+        @now = Time.now
+        if @nextTimeToRun <= @now
+          @waitTimeout = 1
+          @nextTimeToRun = @now
+        else
+          @waitTimeout = @nextTimeToRun - @now
+        end
+        @condition.wait(@mutex, @waitTimeout)
         done = @finished
-        @nextTimeToRun = Time.now
-        @waitTimeout = @run_interval
-        until done
-          @nextTimeToRun = @nextTimeToRun + @run_interval
-          @now = Time.now
-          if @nextTimeToRun <= @now
-            @waitTimeout = 1
-            @nextTimeToRun = @now
-          else
-            @waitTimeout = @nextTimeToRun - @now
-          end
-          @condition.wait(@mutex, @waitTimeout)
-          done = @finished
-          @mutex.unlock
-          if !done
-            begin
-              $log.info("in_kubestate_hpa::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
-              enumerate
-              $log.info("in_kubestate_hpa::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
-            rescue => errorStr
-              $log.warn "in_kubestate_hpa::run_periodic: enumerate Failed to retrieve kube hpas: #{errorStr}"
-              ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::run_periodic: enumerate Failed to retrieve kube hpas: #{errorStr}")
-            end
+        @mutex.unlock
+        if !done
+          begin
+            $log.info("in_kubestate_hpa::run_periodic.enumerate.start @ #{Time.now.utc.iso8601}")
+            enumerate
+            $log.info("in_kubestate_hpa::run_periodic.enumerate.end @ #{Time.now.utc.iso8601}")
+          rescue => errorStr
+            $log.warn "in_kubestate_hpa::run_periodic: enumerate Failed to retrieve kube hpas: #{errorStr}"
+            ApplicationInsightsUtility.sendExceptionTelemetry("in_kubestate_hpa::run_periodic: enumerate Failed to retrieve kube hpas: #{errorStr}")
           end
-          @mutex.lock
         end
-        @mutex.unlock
+        @mutex.lock
       end
+      @mutex.unlock
     end
-end
\ No newline at end of file
+  end
+end

From 9cb058c850cbfd8ed88910920cf3055b8066061b Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Fri, 18 Dec 2020 14:24:37 -0800
Subject: [PATCH 50/60] Gangams/enable arc onboarding to ff (#478)

* wip

* updates

* trigger login if the ctx cloud not same as specified cloud

* add missed commit
---
 .../onboarding/managed/disable-monitoring.ps1 | 34 ++++++++++++---
 .../onboarding/managed/disable-monitoring.sh  | 17 ++++++++
 .../onboarding/managed/enable-monitoring.ps1  | 43 ++++++++++++++++---
 .../onboarding/managed/enable-monitoring.sh   | 38 +++++++++++++---
 .../onboarding/managed/upgrade-monitoring.sh  | 19 +++++++-
 5 files changed, 130 insertions(+), 21 deletions(-)

diff --git a/scripts/onboarding/managed/disable-monitoring.ps1 b/scripts/onboarding/managed/disable-monitoring.ps1
index 1c011bfff..bcd135dba 100644
--- a/scripts/onboarding/managed/disable-monitoring.ps1
+++ b/scripts/onboarding/managed/disable-monitoring.ps1
@@ -15,6 +15,8 @@
         tenantId of the service principal which will be used for the azure login
     .PARAMETER kubeContext (optional)
         kube-context of the k8 cluster to install Azure Monitor for containers HELM chart
+    .PARAMETER azureCloudName (optional)
+        Name of the Azure cloud name. Supported Azure cloud Name is AzureCloud or AzureUSGovernment
 
     Pre-requisites:
       -  Azure Managed cluster Resource Id
@@ -34,7 +36,9 @@ param(
     [Parameter(mandatory = $false)]
     [string]$tenantId,
     [Parameter(mandatory = $false)]
-    [string]$kubeContext
+    [string]$kubeContext,
+    [Parameter(mandatory = $false)]
+    [string]$azureCloudName
 )
 
 $helmChartReleaseName = "azmon-containers-release-1"
@@ -46,6 +50,21 @@ $isAksCluster =  $false
 $isAroV4Cluster = $false
 $isUsingServicePrincipal = $false
 
+if ([string]::IsNullOrEmpty($azureCloudName) -eq $true) {
+    Write-Host("Azure cloud name parameter not passed in so using default cloud as AzureCloud")
+    $azureCloudName = "AzureCloud"
+} else {
+    if(($azureCloudName.ToLower() -eq "azurecloud" ) -eq $true) {
+        Write-Host("Specified Azure Cloud name is : $azureCloudName")
+    } elseif (($azureCloudName.ToLower() -eq "azureusgovernment" ) -eq $true) {
+        Write-Host("Specified Azure Cloud name is : $azureCloudName")
+    } else {
+        Write-Host("Specified Azure Cloud name is : $azureCloudName")
+        Write-Host("Only supported Azure clouds are : AzureCloud and AzureUSGovernment")
+        exit
+    }
+}
+
 # checks the required Powershell modules exist and if not exists, request the user permission to install
 $azAccountModule = Get-Module -ListAvailable -Name Az.Accounts
 $azResourcesModule = Get-Module -ListAvailable -Name Az.Resources
@@ -226,14 +245,19 @@ Write-Host("Cluster SubscriptionId : '" + $clusterSubscriptionId + "' ") -Foregr
 if ($isUsingServicePrincipal) {
     $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
     $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
-    Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+    Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId -Environment $azureCloudName
 }
 
 try {
     Write-Host("")
     Write-Host("Trying to get the current Az login context...")
     $account = Get-AzContext -ErrorAction Stop
-    Write-Host("Successfully fetched current AzContext context...") -ForegroundColor Green
+    $ctxCloud = $account.Environment.Name
+    if(($azureCloudName.ToLower() -eq $ctxCloud.ToLower() ) -eq $false) {
+        Write-Host("Specified azure cloud name is not same as current context cloud hence setting account to null to retrigger the login" ) -ForegroundColor Green
+        $account = $null
+    }
+    Write-Host("Successfully fetched current AzContext context and azure cloud name: $azureCloudName" ) -ForegroundColor Green
     Write-Host("")
 }
 catch {
@@ -249,10 +273,10 @@ if ($null -eq $account.Account) {
         if ($isUsingServicePrincipal) {
             $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
             $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId,$spSecret
-            Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+            Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId -Environment $azureCloudName
         } else {
            Write-Host("Please login...")
-          Connect-AzAccount -subscriptionid $clusterSubscriptionId
+          Connect-AzAccount -subscriptionid $clusterSubscriptionId -Environment $azureCloudName
         }
     }
     catch {
diff --git a/scripts/onboarding/managed/disable-monitoring.sh b/scripts/onboarding/managed/disable-monitoring.sh
index c11426f30..d43a79f51 100644
--- a/scripts/onboarding/managed/disable-monitoring.sh
+++ b/scripts/onboarding/managed/disable-monitoring.sh
@@ -280,10 +280,27 @@ done
 
 }
 
+validate_and_configure_supported_cloud() {
+  echo "get active azure cloud name configured to azure cli"
+  azureCloudName=$(az cloud show --query name -o tsv | tr "[:upper:]" "[:lower:]")
+  echo "active azure cloud name configured to azure cli: ${azureCloudName}"
+  if [ "$isArcK8sCluster" = true ]; then
+    if [ "$azureCloudName" != "azurecloud" -a  "$azureCloudName" != "azureusgovernment" ]; then
+      echo "-e only supported clouds are AzureCloud and AzureUSGovernment for Azure Arc enabled Kubernetes cluster type"
+      exit 1
+    fi
+  else
+    # For ARO v4, only supported cloud is public so just configure to public to keep the existing behavior
+    configure_to_public_cloud
+  fi
+}
 
 # parse args
 parse_args $@
 
+# validate and configure azure cloud
+validate_and_configure_supported_cloud
+
 # parse cluster resource id
 clusterSubscriptionId="$(echo $clusterResourceId | cut -d'/' -f3 | tr "[:upper:]" "[:lower:]")"
 clusterResourceGroup="$(echo $clusterResourceId | cut -d'/' -f5)"
diff --git a/scripts/onboarding/managed/enable-monitoring.ps1 b/scripts/onboarding/managed/enable-monitoring.ps1
index b052f22c5..7b128b112 100644
--- a/scripts/onboarding/managed/enable-monitoring.ps1
+++ b/scripts/onboarding/managed/enable-monitoring.ps1
@@ -22,6 +22,8 @@
     .PARAMETER proxyEndpoint (optional)
         Provide Proxy endpoint if you have K8s cluster behind the proxy and would like to route Azure Monitor for containers outbound traffic via proxy.
         Format of the proxy endpoint should be http(s://<user>:<password>@<proxyhost>:<port>
+    .PARAMETER azureCloudName (optional)
+       Name of the Azure cloud name. Supported Azure cloud Name is AzureCloud or AzureUSGovernment
 
      Pre-requisites:
       -  Azure Managed cluster Resource Id
@@ -46,7 +48,9 @@ param(
     [Parameter(mandatory = $false)]
     [string]$workspaceResourceId,
     [Parameter(mandatory = $false)]
-    [string]$proxyEndpoint
+    [string]$proxyEndpoint,
+    [Parameter(mandatory = $false)]
+    [string]$azureCloudName
 )
 
 $solutionTemplateUri = "https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_dev/scripts/onboarding/templates/azuremonitor-containerSolution.json"
@@ -63,6 +67,24 @@ $mcr = "mcr.microsoft.com"
 $mcrChartVersion = "2.7.9"
 $mcrChartRepoPath = "azuremonitor/containerinsights/preview/azuremonitor-containers"
 $helmLocalRepoName = "."
+$omsAgentDomainName="opinsights.azure.com"
+
+if ([string]::IsNullOrEmpty($azureCloudName) -eq $true) {
+    Write-Host("Azure cloud name parameter not passed in so using default cloud as AzureCloud")
+    $azureCloudName = "AzureCloud"
+} else {
+    if(($azureCloudName.ToLower() -eq "azurecloud" ) -eq $true) {
+        Write-Host("Specified Azure Cloud name is : $azureCloudName")
+        $omsAgentDomainName="opinsights.azure.com"
+    } elseif (($azureCloudName.ToLower() -eq "azureusgovernment" ) -eq $true) {
+        Write-Host("Specified Azure Cloud name is : $azureCloudName")
+        $omsAgentDomainName="opinsights.azure.us"
+    } else {
+        Write-Host("Specified Azure Cloud name is : $azureCloudName")
+        Write-Host("Only supported azure clouds are : AzureCloud and AzureUSGovernment")
+        exit
+    }
+}
 
 # checks the required Powershell modules exist and if not exists, request the user permission to install
 $azAccountModule = Get-Module -ListAvailable -Name Az.Accounts
@@ -244,14 +266,19 @@ Write-Host("Cluster SubscriptionId : '" + $clusterSubscriptionId + "' ") -Foregr
 if ($isUsingServicePrincipal) {
     $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
     $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId, $spSecret
-    Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+    Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId -Environment $azureCloudName
 }
 
 try {
     Write-Host("")
     Write-Host("Trying to get the current Az login context...")
     $account = Get-AzContext -ErrorAction Stop
-    Write-Host("Successfully fetched current AzContext context...") -ForegroundColor Green
+    $ctxCloud = $account.Environment.Name
+    if(($azureCloudName.ToLower() -eq $ctxCloud.ToLower() ) -eq $false) {
+        Write-Host("Specified azure cloud name is not same as current context cloud hence setting account to null to retrigger the login" ) -ForegroundColor Green
+        $account = $null
+    }
+    Write-Host("Successfully fetched current AzContext context and azure cloud name: $azureCloudName" ) -ForegroundColor Green
     Write-Host("")
 }
 catch {
@@ -266,11 +293,12 @@ if ($null -eq $account.Account) {
         if ($isUsingServicePrincipal) {
             $spSecret = ConvertTo-SecureString -String $servicePrincipalClientSecret -AsPlainText -Force
             $spCreds = New-Object -TypeName "System.Management.Automation.PSCredential" -ArgumentList $servicePrincipalClientId, $spSecret
-            Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId
+
+            Connect-AzAccount -ServicePrincipal -Credential $spCreds -Tenant $tenantId -Subscription $clusterSubscriptionId -Environment $azureCloudName
         }
         else {
             Write-Host("Please login...")
-            Connect-AzAccount -subscriptionid $clusterSubscriptionId
+            Connect-AzAccount -subscriptionid $clusterSubscriptionId -Environment $azureCloudName
         }
     }
     catch {
@@ -380,7 +408,8 @@ if ([string]::IsNullOrEmpty($workspaceResourceId)) {
         "westeurope"         = "westeurope" ;
         "westindia"          = "centralindia" ;
         "westus"             = "westus" ;
-        "westus2"            = "westus2"
+        "westus2"            = "westus2";
+        "usgovvirginia"      = "usgovvirginia"
     }
 
     $workspaceRegionCode = "EUS"
@@ -531,7 +560,7 @@ try {
 
     Write-Host("helmChartRepoPath is : ${helmChartRepoPath}")
 
-    $helmParameters = "omsagent.secret.wsid=$workspaceGUID,omsagent.secret.key=$workspacePrimarySharedKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion"
+    $helmParameters = "omsagent.domain=$omsAgentDomainName,omsagent.secret.wsid=$workspaceGUID,omsagent.secret.key=$workspacePrimarySharedKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion"
     if ([string]::IsNullOrEmpty($proxyEndpoint) -eq $false) {
         Write-Host("using proxy endpoint since its provided")
         $helmParameters = $helmParameters + ",omsagent.proxy=$proxyEndpoint"
diff --git a/scripts/onboarding/managed/enable-monitoring.sh b/scripts/onboarding/managed/enable-monitoring.sh
index bb6974258..85428aff7 100644
--- a/scripts/onboarding/managed/enable-monitoring.sh
+++ b/scripts/onboarding/managed/enable-monitoring.sh
@@ -38,8 +38,10 @@
 set -e
 set -o pipefail
 
-# default to public cloud since only supported cloud is azure public clod
+# default to public cloud since only supported cloud is azure public cloud
 defaultAzureCloud="AzureCloud"
+# default domain will be for public cloud
+omsAgentDomainName="opinsights.azure.com"
 
 # released chart version in mcr
 mcrChartVersion="2.7.9"
@@ -307,6 +309,25 @@ parse_args() {
 
 }
 
+validate_and_configure_supported_cloud() {
+  echo "get active azure cloud name configured to azure cli"
+  azureCloudName=$(az cloud show --query name -o tsv | tr "[:upper:]" "[:lower:]")
+  echo "active azure cloud name configured to azure cli: ${azureCloudName}"
+  if [ "$isArcK8sCluster" = true ]; then
+    if [ "$azureCloudName" != "azurecloud" -a  "$azureCloudName" != "azureusgovernment" ]; then
+      echo "-e only supported clouds are AzureCloud and AzureUSGovernment for Azure Arc enabled Kubernetes cluster type"
+      exit 1
+    fi
+    if [ "$azureCloudName" = "azureusgovernment" ]; then
+      echo "setting omsagent domain as opinsights.azure.us since the azure cloud is azureusgovernment "
+      omsAgentDomainName="opinsights.azure.us"
+    fi
+  else
+    # For ARO v4, only supported cloud is public so just configure to public to keep the existing behavior
+    configure_to_public_cloud
+  fi
+}
+
 configure_to_public_cloud() {
   echo "Set AzureCloud as active cloud for az cli"
   az cloud set -n $defaultAzureCloud
@@ -398,8 +419,10 @@ create_default_log_analytics_workspace() {
     [westindia]=centralindia
     [westus]=westus
     [westus2]=westus2
+    [usgovvirginia]=usgovvirginia
   )
 
+  echo "cluster Region:"$clusterRegion
   if [ -n "${AzureCloudRegionToOmsRegionMap[$clusterRegion]}" ]; then
     workspaceRegion=${AzureCloudRegionToOmsRegionMap[$clusterRegion]}
   fi
@@ -433,6 +456,7 @@ create_default_log_analytics_workspace() {
 
   workspaceResourceId=$(az resource show -g $workspaceResourceGroup -n $workspaceName --resource-type $workspaceResourceProvider --query id)
   workspaceResourceId=$(echo $workspaceResourceId | tr -d '"')
+  echo "workspace resource Id: ${workspaceResourceId}"
 }
 
 add_container_insights_solution() {
@@ -504,18 +528,18 @@ install_helm_chart() {
     echo "using proxy endpoint since proxy configuration passed in"
     if [ -z "$kubeconfigContext" ]; then
       echo "using current kube-context since --kube-context/-k parameter not passed in"
-      helm upgrade --install $releaseName --set omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath
+      helm upgrade --install $releaseName --set omsagent.domain=$omsAgentDomainName,omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath
     else
       echo "using --kube-context:${kubeconfigContext} since passed in"
-      helm upgrade --install $releaseName --set omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath --kube-context ${kubeconfigContext}
+      helm upgrade --install $releaseName --set omsagent.domain=$omsAgentDomainName,omsagent.proxy=$proxyEndpoint,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath --kube-context ${kubeconfigContext}
     fi
   else
     if [ -z "$kubeconfigContext" ]; then
       echo "using current kube-context since --kube-context/-k parameter not passed in"
-      helm upgrade --install $releaseName --set omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath
+      helm upgrade --install $releaseName --set omsagent.domain=$omsAgentDomainName,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath
     else
       echo "using --kube-context:${kubeconfigContext} since passed in"
-      helm upgrade --install $releaseName --set omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath --kube-context ${kubeconfigContext}
+      helm upgrade --install $releaseName --set omsagent.domain=$omsAgentDomainName,omsagent.secret.wsid=$workspaceGuid,omsagent.secret.key=$workspaceKey,omsagent.env.clusterId=$clusterResourceId,omsagent.env.clusterRegion=$clusterRegion $helmChartRepoPath --kube-context ${kubeconfigContext}
     fi
   fi
 
@@ -560,8 +584,8 @@ enable_aks_monitoring_addon() {
 # parse and validate args
 parse_args $@
 
-# configure azure cli for public cloud
-configure_to_public_cloud
+# validate and configure azure cli for cloud
+validate_and_configure_supported_cloud
 
 # parse cluster resource id
 clusterSubscriptionId="$(echo $clusterResourceId | cut -d'/' -f3 | tr "[:upper:]" "[:lower:]")"
diff --git a/scripts/onboarding/managed/upgrade-monitoring.sh b/scripts/onboarding/managed/upgrade-monitoring.sh
index 11ecf6819..847bf84ea 100644
--- a/scripts/onboarding/managed/upgrade-monitoring.sh
+++ b/scripts/onboarding/managed/upgrade-monitoring.sh
@@ -281,11 +281,26 @@ set_azure_subscription() {
   echo "successfully configured subscription id: ${subscriptionId} as current subscription for the azure cli"
 }
 
+validate_and_configure_supported_cloud() {
+  echo "get active azure cloud name configured to azure cli"
+  azureCloudName=$(az cloud show --query name -o tsv | tr "[:upper:]" "[:lower:]")
+  echo "active azure cloud name configured to azure cli: ${azureCloudName}"
+  if [ "$isArcK8sCluster" = true ]; then
+    if [ "$azureCloudName" != "azurecloud" -a  "$azureCloudName" != "azureusgovernment" ]; then
+      echo "-e only supported clouds are AzureCloud and AzureUSGovernment for Azure Arc enabled Kubernetes cluster type"
+      exit 1
+    fi
+  else
+    # For ARO v4, only supported cloud is public so just configure to public to keep the existing behavior
+    configure_to_public_cloud
+  fi
+}
+
 # parse and validate args
 parse_args $@
 
-# configure azure cli for public cloud
-configure_to_public_cloud
+# configure azure cli for cloud
+validate_and_configure_supported_cloud
 
 # parse cluster resource id
 clusterSubscriptionId="$(echo $clusterResourceId | cut -d'/' -f3 | tr "[:upper:]" "[:lower:]")"

From ef9d726c7053fba0254fc897aff124e5a5a2be34 Mon Sep 17 00:00:00 2001
From: Grace Wehner <grace.wehner@microsoft.com>
Date: Mon, 4 Jan 2021 10:43:44 -0800
Subject: [PATCH 51/60] Convert PV type dictionary to json for telemetry so it
 shows up in logs (#480)

---
 source/plugins/ruby/in_kube_pvinventory.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/plugins/ruby/in_kube_pvinventory.rb b/source/plugins/ruby/in_kube_pvinventory.rb
index b0e09c85b..861b3a8e1 100644
--- a/source/plugins/ruby/in_kube_pvinventory.rb
+++ b/source/plugins/ruby/in_kube_pvinventory.rb
@@ -90,7 +90,7 @@ def enumerate
         # Flush AppInsights telemetry once all the processing is done
         if telemetryFlush == true
           telemetryProperties = {}
-          telemetryProperties["CountsOfPVTypes"] = @pvTypeToCountHash
+          telemetryProperties["CountsOfPVTypes"] = @pvTypeToCountHash.to_json
           ApplicationInsightsUtility.sendCustomEvent(Constants::PV_INVENTORY_HEART_BEAT_EVENT, telemetryProperties)
           @@pvTelemetryTimeTracker = DateTime.now.to_time.to_i
         end

From 97bdb94ad95234202ec2eca172cf419b5cee82d5 Mon Sep 17 00:00:00 2001
From: Vishwanath <visnara@microsoft.com>
Date: Wed, 6 Jan 2021 09:59:49 -0800
Subject: [PATCH 52/60] fix 2 windows tasks - 1) Dont log to termination log 2)
 enable ADX route for containerlogs in windows (for O365) (#482)

---
 build/common/installer/scripts/tomlparser.rb  |  2 +-
 .../installer/scripts/livenessprobe.cmd       | 24 +++++++------------
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/build/common/installer/scripts/tomlparser.rb b/build/common/installer/scripts/tomlparser.rb
index 7235ee0c3..1d33da124 100644
--- a/build/common/installer/scripts/tomlparser.rb
+++ b/build/common/installer/scripts/tomlparser.rb
@@ -244,7 +244,7 @@ def get_command_windows(env_variable_name, env_variable_value)
     file.write(commands)
     commands = get_command_windows('AZMON_CLUSTER_COLLECT_ALL_KUBE_EVENTS', @collectAllKubeEvents)
     file.write(commands)
-    commands = get_command_windows('AZMON_CONTAINER_LOGS_ROUTE', @containerLogsRoute)
+    commands = get_command_windows('AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE', @containerLogsRoute)
     file.write(commands)
 
     # Close file after writing all environment variables
diff --git a/build/windows/installer/scripts/livenessprobe.cmd b/build/windows/installer/scripts/livenessprobe.cmd
index 06d577f31..19d0b69d7 100644
--- a/build/windows/installer/scripts/livenessprobe.cmd
+++ b/build/windows/installer/scripts/livenessprobe.cmd
@@ -1,40 +1,32 @@
-echo "Checking if fluent-bit is running"
+REM "Checking if fluent-bit is running"
 
 tasklist /fi "imagename eq fluent-bit.exe" /fo "table"  | findstr fluent-bit
 
 IF ERRORLEVEL 1 (
-    echo "Fluent-Bit is not running" > /dev/termination-log
+    echo "Fluent-Bit is not running"
     exit /b 1
-) ELSE (
-    echo "Fluent-Bit is running"
 )
 
-echo "Checking if config map has been updated since agent start"
+REM "Checking if config map has been updated since agent start"
 
 IF EXIST C:\etc\omsagentwindows\filesystemwatcher.txt (
-    echo "Config Map Updated since agent started" > /dev/termination-log
+    echo "Config Map Updated since agent started"
     exit /b  1
-) ELSE (
-    echo "Config Map not Updated since agent start"
 )
 
-echo "Checking if certificate needs to be renewed (aka agent restart required)"
+REM "Checking if certificate needs to be renewed (aka agent restart required)"
 
 IF EXIST C:\etc\omsagentwindows\renewcertificate.txt (
-    echo "Certificate needs to be renewed" > /dev/termination-log
+    echo "Certificate needs to be renewed"
     exit /b  1
-) ELSE (
-    echo "Certificate does NOT need to be renewd"
 )
 
-echo "Checking if fluentd service is running"
+REM "Checking if fluentd service is running"
 sc query fluentdwinaks | findstr /i STATE | findstr RUNNING
 
 IF ERRORLEVEL 1 (
-    echo "Fluentd Service is NOT Running" > /dev/termination-log
+    echo "Fluentd Service is NOT Running"
     exit /b  1
-) ELSE (
-    echo "Fluentd Service is Running"
 )
 
 exit /b 0

From 94237beba5671904945a676d156c609118c0b2d7 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Wed, 6 Jan 2021 13:58:22 -0800
Subject: [PATCH 53/60] fix ci envvar collection in large pods (#483)

---
 .../ruby/kubernetes_container_inventory.rb    | 43 +++++++++++++------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/source/plugins/ruby/kubernetes_container_inventory.rb b/source/plugins/ruby/kubernetes_container_inventory.rb
index 4fe728579..ba6a9af42 100644
--- a/source/plugins/ruby/kubernetes_container_inventory.rb
+++ b/source/plugins/ruby/kubernetes_container_inventory.rb
@@ -193,25 +193,41 @@ def obtainContainerEnvironmentVars(containerId)
       $log.info("KubernetesContainerInventory::obtainContainerEnvironmentVars @ #{Time.now.utc.iso8601}")
       envValueString = ""
       begin
-        unless @@containerCGroupCache.has_key?(containerId)
+        isCGroupPidFetchRequired = false 
+        if !@@containerCGroupCache.has_key?(containerId)
+          isCGroupPidFetchRequired = true 
+        else
+          cGroupPid = @@containerCGroupCache[containerId]
+          if cGroupPid.nil? || cGroupPid.empty?            
+            isCGroupPidFetchRequired = true
+            @@containerCGroupCache.delete(containerId)
+          elsif !File.exist?("/hostfs/proc/#{cGroupPid}/environ")              
+            isCGroupPidFetchRequired = true
+            @@containerCGroupCache.delete(containerId)                       
+          end        
+        end
+
+        if isCGroupPidFetchRequired
           $log.info("KubernetesContainerInventory::obtainContainerEnvironmentVars fetching cGroup parent pid @ #{Time.now.utc.iso8601} for containerId: #{containerId}")
           Dir["/hostfs/proc/*/cgroup"].each do |filename|
             begin
-              if File.file?(filename) && File.foreach(filename).grep(/#{containerId}/).any?
+              if File.file?(filename) && File.exist?(filename) && File.foreach(filename).grep(/#{containerId}/).any?
                 # file full path is /hostfs/proc/<cGroupPid>/cgroup
-                cGroupPid = filename.split("/")[3]
-                if @@containerCGroupCache.has_key?(containerId)
-                  tempCGroupPid = @@containerCGroupCache[containerId]
-                  if tempCGroupPid > cGroupPid
+                cGroupPid = filename.split("/")[3]  
+                if is_number?(cGroupPid)                              
+                  if @@containerCGroupCache.has_key?(containerId)
+                    tempCGroupPid = @@containerCGroupCache[containerId]                  
+                    if tempCGroupPid.to_i > cGroupPid.to_i
+                      @@containerCGroupCache[containerId] = cGroupPid
+                    end
+                  else
                     @@containerCGroupCache[containerId] = cGroupPid
-                  end
-                else
-                  @@containerCGroupCache[containerId] = cGroupPid
+                  end                        
                 end
               end
-            rescue SystemCallError # ignore Error::ENOENT,Errno::ESRCH which is expected if any of the container gone while we read
-            end
-          end
+            rescue SystemCallError # ignore Error::ENOENT,Errno::ESRCH which is expected if any of the container gone while we read              
+            end          
+          end        
         end
         cGroupPid = @@containerCGroupCache[containerId]
         if !cGroupPid.nil? && !cGroupPid.empty?
@@ -341,5 +357,8 @@ def deleteCGroupCacheEntryForDeletedContainer(containerId)
         ApplicationInsightsUtility.sendExceptionTelemetry(error)
       end
     end
+    def is_number?(value)
+      true if Integer(value) rescue false
+    end
   end
 end

From aacd496eeba6350ec0d028334813df7edc806a5e Mon Sep 17 00:00:00 2001
From: Grace Wehner <grace.wehner@microsoft.com>
Date: Thu, 7 Jan 2021 13:39:17 -0800
Subject: [PATCH 54/60] grwehner/jan agent tasks (#481)

- Windows agent fix to use log filtering settings in config map.
- Error handling for kubelet_utils get_node_capacity in case /metrics/cadvsior endpoint fails.
- Remove env variable for workspace key for windows agent
---
 build/common/installer/scripts/tomlparser.rb         |  2 +-
 .../installer/certificategenerator/Program.cs        |  8 +++-----
 build/windows/installer/conf/fluent.conf             | 12 ++++++++++--
 kubernetes/windows/main.ps1                          | 12 ++----------
 source/plugins/ruby/filter_cadvisor2mdm.rb           | 12 ++++++++++--
 5 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/build/common/installer/scripts/tomlparser.rb b/build/common/installer/scripts/tomlparser.rb
index 1d33da124..fe26f639e 100644
--- a/build/common/installer/scripts/tomlparser.rb
+++ b/build/common/installer/scripts/tomlparser.rb
@@ -228,7 +228,7 @@ def get_command_windows(env_variable_name, env_variable_value)
     file.write(commands)
     commands = get_command_windows('AZMON_LOG_TAIL_PATH', @logTailPath)
     file.write(commands)
-    commands = get_command_windows('AZMON_LOG_EXCLUSION_REGEX_PATTERN', @stdoutExcludeNamespaces)
+    commands = get_command_windows('AZMON_LOG_EXCLUSION_REGEX_PATTERN', @logExclusionRegexPattern)
     file.write(commands)
     commands = get_command_windows('AZMON_STDOUT_EXCLUDED_NAMESPACES', @stdoutExcludeNamespaces)
     file.write(commands)
diff --git a/build/windows/installer/certificategenerator/Program.cs b/build/windows/installer/certificategenerator/Program.cs
index 43063c4be..e24d0e303 100644
--- a/build/windows/installer/certificategenerator/Program.cs
+++ b/build/windows/installer/certificategenerator/Program.cs
@@ -414,14 +414,12 @@ static void Main(string[] args)
 
             try
             {
-                if (!String.IsNullOrEmpty(Environment.GetEnvironmentVariable("WSKEY")))
-                {
-                    logAnalyticsWorkspaceSharedKey = Environment.GetEnvironmentVariable("WSKEY");
-                }
+              // WSKEY isn't stored as an environment variable
+              logAnalyticsWorkspaceSharedKey = File.ReadAllText("C:/etc/omsagent-secret/KEY").Trim();
             }
             catch (Exception ex)
             {
-                Console.WriteLine("Failed to read env variables (WSKEY)" + ex.Message);
+                Console.WriteLine("Failed to read secret (WSKEY)" + ex.Message);
             }
 
             try
diff --git a/build/windows/installer/conf/fluent.conf b/build/windows/installer/conf/fluent.conf
index c96300b1e..d5eb475ca 100644
--- a/build/windows/installer/conf/fluent.conf
+++ b/build/windows/installer/conf/fluent.conf
@@ -6,7 +6,8 @@
 
 <source>
   @type tail
-  path /var/log/containers/*.log
+  path "#{ENV['AZMON_LOG_TAIL_PATH']}"
+  exclude_path "#{ENV['AZMON_CLUSTER_LOG_TAIL_EXCLUDE_PATH']}"
   pos_file /var/opt/microsoft/fluent/fluentd-containers.log.pos
   tag oms.container.log.la
   @log_level trace
@@ -28,6 +29,14 @@
   @include fluent-docker-parser.conf
 </source>
 
+<filter oms.container.log.la>
+  @type grep
+  <exclude>
+    key stream
+    pattern "#{ENV['AZMON_LOG_EXCLUSION_REGEX_PATTERN']}"
+  </exclude>
+</filter>
+
 <filter  oms.container.**>
   @type record_transformer
   # fluent-plugin-record-modifier more light-weight but needs to be installed (dependency worth it?)
@@ -37,7 +46,6 @@
   </record>
 </filter>
 
-
 <match oms.container.**>
   @type forward
   send_timeout 60s
diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1
index d32e5068a..a297e3801 100644
--- a/kubernetes/windows/main.ps1
+++ b/kubernetes/windows/main.ps1
@@ -64,19 +64,11 @@ function Set-EnvironmentVariables {
         $wsID = Get-Content /etc/omsagent-secret/WSID
     }
 
-    # Set DOMAIN
+    # Set WSID
     [System.Environment]::SetEnvironmentVariable("WSID", $wsID, "Process")
     [System.Environment]::SetEnvironmentVariable("WSID", $wsID, "Machine")
 
-    $wsKey = ""
-    if (Test-Path /etc/omsagent-secret/KEY) {
-        # TODO: Change to omsagent-secret before merging
-        $wsKey = Get-Content /etc/omsagent-secret/KEY
-    }
-
-    # Set KEY
-    [System.Environment]::SetEnvironmentVariable("WSKEY", $wsKey, "Process")
-    [System.Environment]::SetEnvironmentVariable("WSKEY", $wsKey, "Machine")
+    # Don't store WSKEY as environment variable
 
     $proxy = ""
     if (Test-Path /etc/omsagent-secret/PROXY) {
diff --git a/source/plugins/ruby/filter_cadvisor2mdm.rb b/source/plugins/ruby/filter_cadvisor2mdm.rb
index 2423ad024..8d7e729c8 100644
--- a/source/plugins/ruby/filter_cadvisor2mdm.rb
+++ b/source/plugins/ruby/filter_cadvisor2mdm.rb
@@ -308,8 +308,16 @@ def ensure_cpu_memory_capacity_set
         end
       elsif controller_type.downcase == "daemonset"
         capacity_from_kubelet = KubeletUtils.get_node_capacity
-        @cpu_capacity = capacity_from_kubelet[0]
-        @memory_capacity = capacity_from_kubelet[1]
+
+        # Error handling in case /metrics/cadvsior endpoint fails
+        if !capacity_from_kubelet.nil? && capacity_from_kubelet.length > 1
+          @cpu_capacity = capacity_from_kubelet[0]
+          @memory_capacity = capacity_from_kubelet[1]
+        else
+          # cpu_capacity and memory_capacity keep initialized value of 0.0
+          @log.error "Error getting capacity_from_kubelet: cpu_capacity and memory_capacity"
+        end
+
       end
     end
 

From 148d73974a003aba7f77f93389c59aede4679b49 Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Thu, 7 Jan 2021 18:38:06 -0800
Subject: [PATCH 55/60] updating fbit version and cpu limit (#485)

---
 kubernetes/linux/setup.sh | 2 +-
 kubernetes/omsagent.yaml  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kubernetes/linux/setup.sh b/kubernetes/linux/setup.sh
index fb41d4782..88e9da4dd 100644
--- a/kubernetes/linux/setup.sh
+++ b/kubernetes/linux/setup.sh
@@ -71,7 +71,7 @@ chmod 777 /opt/telegraf
 wget -qO - https://packages.fluentbit.io/fluentbit.key | sudo apt-key add -
 sudo echo "deb https://packages.fluentbit.io/ubuntu/xenial xenial main" >> /etc/apt/sources.list
 sudo apt-get update
-sudo apt-get install td-agent-bit=1.4.2 -y
+sudo apt-get install td-agent-bit=1.6.9 -y
 
 rm -rf $TMPDIR/omsbundle
 rm -f $TMPDIR/omsagent*.sh
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 013e2a6c0..563955968 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -368,7 +368,7 @@ spec:
           imagePullPolicy: IfNotPresent
           resources:
             limits:
-              cpu: 250m
+              cpu: 500m
               memory: 600Mi
             requests:
               cpu: 75m

From bd33dd9f23cfc5c569e83d9389b2d0064757f5be Mon Sep 17 00:00:00 2001
From: rashmichandrashekar <rashmy@microsoft.com>
Date: Fri, 8 Jan 2021 13:47:25 -0800
Subject: [PATCH 56/60] reverting to older version (#487)

---
 kubernetes/linux/setup.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kubernetes/linux/setup.sh b/kubernetes/linux/setup.sh
index 88e9da4dd..352be06d7 100644
--- a/kubernetes/linux/setup.sh
+++ b/kubernetes/linux/setup.sh
@@ -71,7 +71,7 @@ chmod 777 /opt/telegraf
 wget -qO - https://packages.fluentbit.io/fluentbit.key | sudo apt-key add -
 sudo echo "deb https://packages.fluentbit.io/ubuntu/xenial xenial main" >> /etc/apt/sources.list
 sudo apt-get update
-sudo apt-get install td-agent-bit=1.6.9 -y
+sudo apt-get install td-agent-bit=1.6.8 -y
 
 rm -rf $TMPDIR/omsbundle
 rm -f $TMPDIR/omsagent*.sh

From d5164d235dd2512824f679ddbe30ebafdf8f1a14 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 11 Jan 2021 11:48:50 -0800
Subject: [PATCH 57/60] Gangams/add fbsettings configurable via configmap
 (#486)

* wip

* fbit config settings

* add config warn message

* handle one config provided but not other

* fixed pr feedback

* fix copy paste error

* rename config parameter names

* fix typo

* fix fbit crash in helm path

* fix nil check
---
 .../scripts/td-agent-bit-conf-customizer.rb   | 11 +++--
 .../scripts/tomlparser-agent-config.rb        | 48 +++++++++++++++++++
 kubernetes/linux/main.sh                      |  1 +
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/build/common/installer/scripts/td-agent-bit-conf-customizer.rb b/build/common/installer/scripts/td-agent-bit-conf-customizer.rb
index fae3acb36..35b71e550 100644
--- a/build/common/installer/scripts/td-agent-bit-conf-customizer.rb
+++ b/build/common/installer/scripts/td-agent-bit-conf-customizer.rb
@@ -18,12 +18,17 @@ def substituteFluentBitPlaceHolders
     bufferChunkSize = ENV["FBIT_TAIL_BUFFER_CHUNK_SIZE"]
     bufferMaxSize = ENV["FBIT_TAIL_BUFFER_MAX_SIZE"]
 
-    serviceInterval = (!interval.nil? && is_number?(interval)) ? interval : @default_service_interval
+    serviceInterval = (!interval.nil? && is_number?(interval) && interval.to_i > 0 ) ? interval : @default_service_interval
     serviceIntervalSetting = "Flush         " + serviceInterval
 
-    tailBufferChunkSize = (!bufferChunkSize.nil? && is_number?(bufferChunkSize)) ? bufferChunkSize : nil
+    tailBufferChunkSize = (!bufferChunkSize.nil? && is_number?(bufferChunkSize) && bufferChunkSize.to_i > 0) ? bufferChunkSize : nil
 
-    tailBufferMaxSize = (!bufferMaxSize.nil? && is_number?(bufferMaxSize)) ? bufferMaxSize : nil
+    tailBufferMaxSize = (!bufferMaxSize.nil? && is_number?(bufferMaxSize) && bufferMaxSize.to_i > 0) ? bufferMaxSize : nil
+
+    if ((!tailBufferChunkSize.nil? && tailBufferMaxSize.nil?) ||  (!tailBufferChunkSize.nil? && !tailBufferMaxSize.nil? && tailBufferChunkSize.to_i > tailBufferMaxSize.to_i))
+      puts "config:warn buffer max size must be greater or equal to chunk size"
+      tailBufferMaxSize = tailBufferChunkSize
+    end
 
     text = File.read(@td_agent_bit_conf_path)
     new_contents = text.gsub("${SERVICE_FLUSH_INTERVAL}", serviceIntervalSetting)
diff --git a/build/linux/installer/scripts/tomlparser-agent-config.rb b/build/linux/installer/scripts/tomlparser-agent-config.rb
index 87c5194ed..e587909e5 100644
--- a/build/linux/installer/scripts/tomlparser-agent-config.rb
+++ b/build/linux/installer/scripts/tomlparser-agent-config.rb
@@ -55,6 +55,12 @@
 @podsEmitStreamBatchSizeMin = 50
 @nodesEmitStreamBatchSizeMin = 50
 
+# configmap settings related fbit config
+@fbitFlushIntervalSecs = 0
+@fbitTailBufferChunkSizeMBs = 0
+@fbitTailBufferMaxSizeMBs = 0
+
+
 def is_number?(value)
   true if Integer(value) rescue false
 end
@@ -131,6 +137,38 @@ def populateSettingValuesFromConfigMap(parsedConfig)
           puts "Using config map value: NODES_EMIT_STREAM_BATCH_SIZE = #{@nodesEmitStreamBatchSize}"
         end
       end
+      # fbit config settings
+      fbit_config = parsedConfig[:agent_settings][:fbit_config]
+      if !fbit_config.nil?
+        fbitFlushIntervalSecs = fbit_config[:log_flush_interval_secs]
+        if !fbitFlushIntervalSecs.nil? && is_number?(fbitFlushIntervalSecs) && fbitFlushIntervalSecs.to_i > 0
+          @fbitFlushIntervalSecs = fbitFlushIntervalSecs.to_i
+          puts "Using config map value: log_flush_interval_secs = #{@fbitFlushIntervalSecs}"
+        end
+
+        fbitTailBufferChunkSizeMBs = fbit_config[:tail_buf_chunksize_megabytes]
+        if !fbitTailBufferChunkSizeMBs.nil? && is_number?(fbitTailBufferChunkSizeMBs) && fbitTailBufferChunkSizeMBs.to_i > 0
+          @fbitTailBufferChunkSizeMBs = fbitTailBufferChunkSizeMBs.to_i
+          puts "Using config map value: tail_buf_chunksize_megabytes  = #{@fbitTailBufferChunkSizeMBs}"
+        end
+
+        fbitTailBufferMaxSizeMBs = fbit_config[:tail_buf_maxsize_megabytes]
+        if !fbitTailBufferMaxSizeMBs.nil? && is_number?(fbitTailBufferMaxSizeMBs) && fbitTailBufferMaxSizeMBs.to_i > 0           
+          if fbitTailBufferMaxSizeMBs.to_i >= @fbitTailBufferChunkSizeMBs
+            @fbitTailBufferMaxSizeMBs = fbitTailBufferMaxSizeMBs.to_i
+            puts "Using config map value: tail_buf_maxsize_megabytes = #{@fbitTailBufferMaxSizeMBs}"
+          else
+            # tail_buf_maxsize_megabytes has to be greater or equal to tail_buf_chunksize_megabytes
+            @fbitTailBufferMaxSizeMBs = @fbitTailBufferChunkSizeMBs
+            puts "config::warn: tail_buf_maxsize_megabytes must be greater or equal to value of tail_buf_chunksize_megabytes. Using tail_buf_maxsize_megabytes = #{@fbitTailBufferMaxSizeMBs} since provided config value not valid"
+          end
+        end
+        # in scenario - tail_buf_chunksize_megabytes provided but not tail_buf_maxsize_megabytes to prevent fbit crash
+        if  @fbitTailBufferChunkSizeMBs > 0  && @fbitTailBufferMaxSizeMBs == 0
+          @fbitTailBufferMaxSizeMBs = @fbitTailBufferChunkSizeMBs
+          puts "config::warn: since tail_buf_maxsize_megabytes not provided hence using tail_buf_maxsize_megabytes=#{@fbitTailBufferMaxSizeMBs} which is same as the value of tail_buf_chunksize_megabytes"
+        end 
+      end
     end
   rescue => errorStr
     puts "config::error:Exception while reading config settings for agent configuration setting - #{errorStr}, using defaults"
@@ -164,6 +202,16 @@ def populateSettingValuesFromConfigMap(parsedConfig)
   file.write("export HPA_CHUNK_SIZE=#{@hpaChunkSize}\n")
   file.write("export PODS_EMIT_STREAM_BATCH_SIZE=#{@podsEmitStreamBatchSize}\n")
   file.write("export NODES_EMIT_STREAM_BATCH_SIZE=#{@nodesEmitStreamBatchSize}\n")
+  # fbit settings
+  if @fbitFlushIntervalSecs > 0
+    file.write("export FBIT_SERVICE_FLUSH_INTERVAL=#{@fbitFlushIntervalSecs}\n")
+  end
+  if @fbitTailBufferChunkSizeMBs > 0
+    file.write("export FBIT_TAIL_BUFFER_CHUNK_SIZE=#{@fbitTailBufferChunkSizeMBs}\n")
+  end
+  if @fbitTailBufferMaxSizeMBs > 0
+    file.write("export FBIT_TAIL_BUFFER_MAX_SIZE=#{@fbitTailBufferMaxSizeMBs}\n")
+  end 
   # Close file after writing all environment variables
   file.close
 else
diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh
index ed16d3e32..b4df538d4 100644
--- a/kubernetes/linux/main.sh
+++ b/kubernetes/linux/main.sh
@@ -172,6 +172,7 @@ source config_env_var
 
 
 #Parse the configmap to set the right environment variables for agent config.
+#Note > tomlparser-agent-config.rb has to be parsed first before td-agent-bit-conf-customizer.rb for fbit agent settings
 /opt/microsoft/omsagent/ruby/bin/ruby tomlparser-agent-config.rb
 
 cat agent_config_env_var | while read line; do

From 908d9b0cdcd46452582338ca23f7bfbf85411e37 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 11 Jan 2021 12:47:38 -0800
Subject: [PATCH 58/60] Gangams/jan agent release tasks (#484)

* wip

* explicit amd64 affinity for hybrid workloads

* fix space issue

* wip

* revert vscode setting file
---
 .../templates/omsagent-daemonset-windows.yaml  |  4 ++++
 .../templates/omsagent-daemonset.yaml          |  4 ++++
 charts/azuremonitor-containers/values.yaml     | 18 +++++++++++++++++-
 kubernetes/omsagent.yaml                       |  8 ++++++++
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
index 6a309c121..81003c704 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset-windows.yaml
@@ -27,6 +27,10 @@ spec:
     checksum/secret: {{ include (print $.Template.BasePath "/omsagent-secret.yaml") . | sha256sum }}
     checksum/config: {{ toYaml .Values.omsagent.resources | sha256sum }}
   spec:
+   dnsConfig:    
+     options:
+       - name: ndots
+         value: "3"     
 {{- if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }}
    nodeSelector:
       kubernetes.io/os: windows
diff --git a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
index d57c4d82b..3d29ede42 100644
--- a/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
+++ b/charts/azuremonitor-containers/templates/omsagent-daemonset.yaml
@@ -28,6 +28,10 @@ spec:
     checksum/config: {{ toYaml .Values.omsagent.resources | sha256sum }}
     checksum/logsettings: {{ toYaml .Values.omsagent.logsettings | sha256sum }}
   spec:
+   dnsConfig:    
+     options:
+       - name: ndots
+         value: "3"     
    {{- if .Values.omsagent.rbac }}
    serviceAccountName: omsagent
    {{- end }}
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index 907e315d1..b3d029146 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -58,7 +58,7 @@ omsagent:
         requiredDuringSchedulingIgnoredDuringExecution:
           nodeSelectorTerms:
             - labelSelector:
-              matchExpressions:
+              matchExpressions:           
                 - key: kubernetes.io/os
                   operator: In
                   values:
@@ -67,6 +67,10 @@ omsagent:
                   operator: NotIn
                   values:
                     - virtual-kubelet
+                - key: kubernetes.io/arch 
+                  operator: In
+                  values:
+                    - amd64   
           nodeSelectorTerms:
             - labelSelector:
               matchExpressions:
@@ -78,6 +82,10 @@ omsagent:
                   operator: NotIn
                   values:
                     - virtual-kubelet
+                - key: beta.kubernetes.io/arch 
+                  operator: In
+                  values:
+                    - amd64       
   deployment:
     affinity:
       nodeAffinity:
@@ -106,6 +114,10 @@ omsagent:
                   operator: NotIn
                   values:
                     - master
+                - key: kubernetes.io/arch 
+                  operator: In
+                  values:
+                    - amd64       
           nodeSelectorTerms:
             - labelSelector:
               matchExpressions:
@@ -121,6 +133,10 @@ omsagent:
                   operator: NotIn
                   values:
                     - master
+                - key: beta.kubernetes.io/arch 
+                  operator: In
+                  values:
+                    - amd64           
   ## Configure resource requests and limits
   ## ref: http://kubernetes.io/docs/user-guide/compute-resources/
   ##
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index 563955968..df80cabc4 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -362,6 +362,10 @@ spec:
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
+      dnsConfig:    
+        options:
+          - name: ndots
+            value: "3"       
       containers:
         - name: omsagent
           image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020"
@@ -675,6 +679,10 @@ spec:
         schema-versions: "v1"
     spec:
      serviceAccountName: omsagent
+     dnsConfig:    
+        options:
+          - name: ndots
+            value: "3"     
      containers:
        - name: omsagent-win
          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod11092020"

From 8ede53653f79a7401352739f6d4f09e572b12235 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 11 Jan 2021 13:18:14 -0800
Subject: [PATCH 59/60] remove per container logs in ci (#488)

---
 .../plugins/ruby/kubernetes_container_inventory.rb   | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/source/plugins/ruby/kubernetes_container_inventory.rb b/source/plugins/ruby/kubernetes_container_inventory.rb
index ba6a9af42..69beca493 100644
--- a/source/plugins/ruby/kubernetes_container_inventory.rb
+++ b/source/plugins/ruby/kubernetes_container_inventory.rb
@@ -189,8 +189,7 @@ def getContainersInfoMap(podItem, isWindows)
       return containersInfoMap
     end
 
-    def obtainContainerEnvironmentVars(containerId)
-      $log.info("KubernetesContainerInventory::obtainContainerEnvironmentVars @ #{Time.now.utc.iso8601}")
+    def obtainContainerEnvironmentVars(containerId)    
       envValueString = ""
       begin
         isCGroupPidFetchRequired = false 
@@ -207,8 +206,7 @@ def obtainContainerEnvironmentVars(containerId)
           end        
         end
 
-        if isCGroupPidFetchRequired
-          $log.info("KubernetesContainerInventory::obtainContainerEnvironmentVars fetching cGroup parent pid @ #{Time.now.utc.iso8601} for containerId: #{containerId}")
+        if isCGroupPidFetchRequired         
           Dir["/hostfs/proc/*/cgroup"].each do |filename|
             begin
               if File.file?(filename) && File.exist?(filename) && File.foreach(filename).grep(/#{containerId}/).any?
@@ -231,8 +229,7 @@ def obtainContainerEnvironmentVars(containerId)
         end
         cGroupPid = @@containerCGroupCache[containerId]
         if !cGroupPid.nil? && !cGroupPid.empty?
-          environFilePath = "/hostfs/proc/#{cGroupPid}/environ"
-          $log.info("KubernetesContainerInventory::obtainContainerEnvironmentVars cGroupPid: #{cGroupPid} environFilePath: #{environFilePath} for containerId: #{containerId}")
+          environFilePath = "/hostfs/proc/#{cGroupPid}/environ"       
           if File.exist?(environFilePath)
             # Skip environment variable processing if it contains the flag AZMON_COLLECT_ENV=FALSE
             # Check to see if the environment variable collection is disabled for this container.
@@ -245,8 +242,7 @@ def obtainContainerEnvironmentVars(containerId)
               if !envVars.nil? && !envVars.empty?
                 envVars = envVars.split("\0")
                 envValueString = envVars.to_json
-                envValueStringLength = envValueString.length
-                $log.info("KubernetesContainerInventory::environment vars filename @ #{environFilePath} envVars size @ #{envValueStringLength}")
+                envValueStringLength = envValueString.length              
                 if envValueStringLength >= 200000
                   lastIndex = envValueString.rindex("\",")
                   if !lastIndex.nil?

From 70e570a5c73eeb864c7cb68c2c6cb936e9b45b85 Mon Sep 17 00:00:00 2001
From: Ganga Mahesh Siddem <gangams@microsoft.com>
Date: Mon, 11 Jan 2021 16:37:04 -0800
Subject: [PATCH 60/60] updates for ciprod01112021 release

---
 ReleaseNotes.md                                 | 17 +++++++++++++++++
 build/version                                   |  6 +++---
 charts/azuremonitor-containers/Chart.yaml       |  2 +-
 charts/azuremonitor-containers/values.yaml      |  6 +++---
 kubernetes/linux/Dockerfile                     |  2 +-
 kubernetes/omsagent.yaml                        | 12 ++++++------
 kubernetes/windows/Dockerfile                   |  2 +-
 .../onboarding/managed/enable-monitoring.ps1    |  2 +-
 scripts/onboarding/managed/enable-monitoring.sh |  2 +-
 .../onboarding/managed/upgrade-monitoring.sh    |  2 +-
 10 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index ddfd01314..b1eb316a1 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -10,6 +10,23 @@ additional questions or comments.
 ## Release History
 
 Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)
+### 01/11/2021 -
+##### Version microsoft/oms:ciprod01112021 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod01112021 (linux)
+##### Version microsoft/oms:win-ciprod01112021 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod01112021 (windows)
+##### Code change log
+- Fixes for Linux Agent Replicaset Pod OOMing issue
+- Update fluentbit (1.14.2 to 1.6.8) for the Linux Daemonset 
+- Make Fluentbit settings: log_flush_interval_secs, tail_buf_chunksize_megabytes and tail_buf_maxsize_megabytes configurable via configmap
+- Support for PV inventory collection
+- Removal of Custom metric region check for Public cloud regions and update to use cloud environment variable to determine the custom metric support
+- For daemonset pods, add the dnsconfig to use ndots: 3 from ndots:5 to optimize the number of DNS API calls made
+- Fix for inconsistency in the collection container environment variables for the pods which has high number of containers
+- Fix for disabling of std{out;err} log_collection_settings via configmap issue in windows daemonset 
+- Update to use workspace key from mount file rather than environment variable for windows daemonset agent
+- Remove per container info logs in the container inventory
+- Enable ADX route for windows container logs
+- Remove logging to termination log in windows agent liveness probe
+
 
 ### 11/09/2020 -
 ##### Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020 (linux)
diff --git a/build/version b/build/version
index a8b78ecac..711a96921 100644
--- a/build/version
+++ b/build/version
@@ -2,11 +2,11 @@
 
 # Build Version Information
 
-CONTAINER_BUILDVERSION_MAJOR=11
+CONTAINER_BUILDVERSION_MAJOR=12
 CONTAINER_BUILDVERSION_MINOR=0
 CONTAINER_BUILDVERSION_PATCH=0
-CONTAINER_BUILDVERSION_BUILDNR=1
-CONTAINER_BUILDVERSION_DATE=20201109
+CONTAINER_BUILDVERSION_BUILDNR=0
+CONTAINER_BUILDVERSION_DATE=20210111
 CONTAINER_BUILDVERSION_STATUS=Developer_Build
 
 #-------------------------------- End of File -----------------------------------
diff --git a/charts/azuremonitor-containers/Chart.yaml b/charts/azuremonitor-containers/Chart.yaml
index 987841f77..a809a4e69 100644
--- a/charts/azuremonitor-containers/Chart.yaml
+++ b/charts/azuremonitor-containers/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v1
 appVersion: 7.0.0-1
 description: Helm chart for deploying Azure Monitor container monitoring agent in Kubernetes
 name: azuremonitor-containers
-version: 2.7.9
+version: 2.8.0
 kubeVersion: "^1.10.0-0"
 keywords:
   - monitoring
diff --git a/charts/azuremonitor-containers/values.yaml b/charts/azuremonitor-containers/values.yaml
index b3d029146..debd66b0b 100644
--- a/charts/azuremonitor-containers/values.yaml
+++ b/charts/azuremonitor-containers/values.yaml
@@ -12,10 +12,10 @@ Azure:
 omsagent:
   image:
     repo: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod"
-    tag: "ciprod11092020"
-    tagWindows: "win-ciprod11092020"
+    tag: "ciprod01112021"
+    tagWindows: "win-ciprod01112021"
     pullPolicy: IfNotPresent
-    dockerProviderVersion: "11.0.0-1"
+    dockerProviderVersion: "12.0.0-0"
     agentVersion: "1.10.0.1"
   ## To get your workspace id and key do the following
   ## You can create a Azure Loganalytics workspace from portal.azure.com and get its ID & PRIMARY KEY from 'Advanced Settings' tab in the Ux.
diff --git a/kubernetes/linux/Dockerfile b/kubernetes/linux/Dockerfile
index 34ab133da..2e1118922 100644
--- a/kubernetes/linux/Dockerfile
+++ b/kubernetes/linux/Dockerfile
@@ -2,7 +2,7 @@ FROM ubuntu:18.04
 MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
-ARG IMAGE_TAG=ciprod11092020
+ARG IMAGE_TAG=ciprod01112021
 ENV AGENT_VERSION ${IMAGE_TAG}
 ENV tmpdir /opt
 ENV APPLICATIONINSIGHTS_AUTH NzAwZGM5OGYtYTdhZC00NThkLWI5NWMtMjA3ZjM3NmM3YmRi
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index df80cabc4..67bd9cdde 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -358,7 +358,7 @@ spec:
         tier: node
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "11.0.0-1"
+        dockerProviderVersion: "12.0.0-0"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
@@ -368,7 +368,7 @@ spec:
             value: "3"       
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod01112021"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -521,13 +521,13 @@ spec:
         rsName: "omsagent-rs"
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "11.0.0-1"
+        dockerProviderVersion: "12.0.0-0"
         schema-versions: "v1"
     spec:
       serviceAccountName: omsagent
       containers:
         - name: omsagent
-          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod11092020"
+          image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod01112021"
           imagePullPolicy: IfNotPresent
           resources:
             limits:
@@ -675,7 +675,7 @@ spec:
         tier: node-win
       annotations:
         agentVersion: "1.10.0.1"
-        dockerProviderVersion: "11.0.0-1"
+        dockerProviderVersion: "12.0.0-0"
         schema-versions: "v1"
     spec:
      serviceAccountName: omsagent
@@ -685,7 +685,7 @@ spec:
             value: "3"     
      containers:
        - name: omsagent-win
-         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod11092020"
+         image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod01112021"
          imagePullPolicy: IfNotPresent
          resources:
           limits:
diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index 10ea235b2..f852bd236 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -3,7 +3,7 @@ MAINTAINER OMSContainers@microsoft.com
 LABEL vendor=Microsoft\ Corp \
     com.microsoft.product="Azure Monitor for containers"
 
-ARG IMAGE_TAG=win-ciprod11092020
+ARG IMAGE_TAG=win-ciprod01112021
 
 # Do not split this into multiple RUN!
 # Docker creates a layer for every RUN-Statement
diff --git a/scripts/onboarding/managed/enable-monitoring.ps1 b/scripts/onboarding/managed/enable-monitoring.ps1
index 7b128b112..45ddb44b0 100644
--- a/scripts/onboarding/managed/enable-monitoring.ps1
+++ b/scripts/onboarding/managed/enable-monitoring.ps1
@@ -64,7 +64,7 @@ $isUsingServicePrincipal = $false
 
 # released chart version in mcr
 $mcr = "mcr.microsoft.com"
-$mcrChartVersion = "2.7.9"
+$mcrChartVersion = "2.8.0"
 $mcrChartRepoPath = "azuremonitor/containerinsights/preview/azuremonitor-containers"
 $helmLocalRepoName = "."
 $omsAgentDomainName="opinsights.azure.com"
diff --git a/scripts/onboarding/managed/enable-monitoring.sh b/scripts/onboarding/managed/enable-monitoring.sh
index 85428aff7..2dc0a465f 100644
--- a/scripts/onboarding/managed/enable-monitoring.sh
+++ b/scripts/onboarding/managed/enable-monitoring.sh
@@ -44,7 +44,7 @@ defaultAzureCloud="AzureCloud"
 omsAgentDomainName="opinsights.azure.com"
 
 # released chart version in mcr
-mcrChartVersion="2.7.9"
+mcrChartVersion="2.8.0"
 mcr="mcr.microsoft.com"
 mcrChartRepoPath="azuremonitor/containerinsights/preview/azuremonitor-containers"
 helmLocalRepoName="."
diff --git a/scripts/onboarding/managed/upgrade-monitoring.sh b/scripts/onboarding/managed/upgrade-monitoring.sh
index 847bf84ea..8826b6df6 100644
--- a/scripts/onboarding/managed/upgrade-monitoring.sh
+++ b/scripts/onboarding/managed/upgrade-monitoring.sh
@@ -20,7 +20,7 @@ set -e
 set -o pipefail
 
 # released chart version for Azure Arc enabled Kubernetes public preview
-mcrChartVersion="2.7.9"
+mcrChartVersion="2.8.0"
 mcr="mcr.microsoft.com"
 mcrChartRepoPath="azuremonitor/containerinsights/preview/azuremonitor-containers"