From 565512e7447fae9d35d3734fa6f76b2bc3146f5b Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 4 Jul 2022 16:01:38 -0700 Subject: [PATCH 01/20] add vpa sidecar container --- kubernetes/omsagent.yaml | 52 ++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 88d2fdda8..b67cb58a7 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -617,6 +617,39 @@ spec: spec: serviceAccountName: omsagent containers: + - image: "mcr.microsoft.com/oss/kubernetes/autoscaler/addon-resizer:1.8.14" + imagePullPolicy: IfNotPresent + name: omsagent-rs-vpa + resources: + limits: + cpu: 100m + memory: 300Mi + requests: + cpu: 5m + memory: 30Mi + env: + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - name: omsagent-rs-vpa-config-volume + mountPath: /etc/config + command: + - /pod_nanny + - --config-dir=/etc/config + - --cpu=200m + - --extra-cpu=2m + - --memory=250Mi + - --extra-memory=4Mi + - --poll-period=180000 + - --threshold=5 + - --deployment=omsagent-rs + - --container=omsagent # Uncomment below lines for MSI Auth Mode testing # - name: addon-token-adapter # command: @@ -655,13 +688,14 @@ spec: - name: omsagent image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06272022" imagePullPolicy: IfNotPresent - resources: - limits: - cpu: 1 - memory: 1Gi - requests: - cpu: 150m - memory: 250Mi + # uncomment only if VPA not configured + # resources: + # limits: + # cpu: 1 + # memory: 1Gi + # requests: + # cpu: 150m + # memory: 250Mi env: - name: NUM_OF_FLUENTD_WORKERS valueFrom: @@ -798,6 +832,10 @@ spec: configMap: name: container-azm-ms-osmconfig optional: true + - name: omsagent-rs-vpa-config-volume + configMap: + name: omsagent-rs-vpa-config + optional: true --- apiVersion: apps/v1 kind: DaemonSet From 8d667518a65092267470da367cc4b691d4be8edc Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 4 Jul 2022 19:53:45 -0700 Subject: [PATCH 02/20] add vpa sidecar container --- kubernetes/omsagent.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index b67cb58a7..fbde38ce8 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -624,7 +624,7 @@ spec: limits: cpu: 100m memory: 300Mi - requests: + requests: cpu: 5m memory: 30Mi env: From c8e08ac68d558a87ff9f3939e9f24d7e4292d378 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 4 Jul 2022 20:15:49 -0700 Subject: [PATCH 03/20] add vpa sidecar container --- kubernetes/omsagent.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index fbde38ce8..3e55a5dda 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -27,6 +27,11 @@ rules: - apiGroups: ["apps", "extensions", "autoscaling"] resources: ["replicasets", "deployments", "horizontalpodautoscalers"] verbs: ["list"] + # Comment below lines if VPA not enabled + - apiGroups: ["apps"] + resources: ["deployments"] + resourceNames: [ "omsagent-rs" ] + verbs: ["get", "patch"] # Uncomment below lines for MSI Auth Mode testing # - apiGroups: [""] # resources: ["secrets"] From e4aae5fcb820e4fd403221be33228d6a0e0fe90d Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 4 Jul 2022 22:18:04 -0700 Subject: [PATCH 04/20] add vpa sidecar container --- kubernetes/omsagent.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 3e55a5dda..cfdedcba5 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -649,7 +649,7 @@ spec: - --config-dir=/etc/config - --cpu=200m - --extra-cpu=2m - - --memory=250Mi + - --memory=300Mi - --extra-memory=4Mi - --poll-period=180000 - --threshold=5 From 62e10023ed32386444975d6d73b6f8073501ba38 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 7 Jul 2022 14:55:09 -0700 Subject: [PATCH 05/20] use image which has support for only scaling limits --- kubernetes/omsagent.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index cfdedcba5..da578903d 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -622,7 +622,7 @@ spec: spec: serviceAccountName: omsagent containers: - - image: "mcr.microsoft.com/oss/kubernetes/autoscaler/addon-resizer:1.8.14" + - image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:addon-resizer-1.8.15" imagePullPolicy: IfNotPresent name: omsagent-rs-vpa resources: @@ -655,6 +655,7 @@ spec: - --threshold=5 - --deployment=omsagent-rs - --container=omsagent + - --ignore-resource-requests=true # Uncomment below lines for MSI Auth Mode testing # - name: addon-token-adapter # command: From f09c2f3e76afbfff17351bd42013f67c3f8865d8 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Sat, 9 Jul 2022 19:43:11 -0700 Subject: [PATCH 06/20] rename omsagent-rs-vpa to omsagent-vpa --- kubernetes/omsagent.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index da578903d..b75e164d9 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -624,7 +624,7 @@ spec: containers: - image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:addon-resizer-1.8.15" imagePullPolicy: IfNotPresent - name: omsagent-rs-vpa + name: omsagent-vpa resources: limits: cpu: 100m From d95a1d7e85c57afabd4b3c91c3df52a53c788cb6 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Sun, 10 Jul 2022 22:59:02 -0700 Subject: [PATCH 07/20] add vpa configmap --- kubernetes/container-azm-ms-vpaconfig.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 kubernetes/container-azm-ms-vpaconfig.yaml diff --git a/kubernetes/container-azm-ms-vpaconfig.yaml b/kubernetes/container-azm-ms-vpaconfig.yaml new file mode 100644 index 000000000..9734a59f7 --- /dev/null +++ b/kubernetes/container-azm-ms-vpaconfig.yaml @@ -0,0 +1,13 @@ +kind: ConfigMap +apiVersion: v1 +data: + NannyConfiguration: |- + apiVersion: nannyconfig/v1alpha1 + kind: NannyConfiguration + baseCPU: 200m + cpuPerNode: 2m + baseMemory: 350Mi + memoryPerNode: 4Mi +metadata: + name: container-azm-ms-vpaconfig + namespace: kube-system From 526925af842bb5d15705ffd5d2f63afa81988d39 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Sun, 10 Jul 2022 23:11:29 -0700 Subject: [PATCH 08/20] use updated version of addon-resizer --- kubernetes/omsagent.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index b75e164d9..33aefc038 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -622,7 +622,7 @@ spec: spec: serviceAccountName: omsagent containers: - - image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:addon-resizer-1.8.15" + - image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:addon-resizer-1.8.16" imagePullPolicy: IfNotPresent name: omsagent-vpa resources: From c45acffa16c594760e25c1bd6bd373718a566b15 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 12 Jul 2022 13:47:59 -0700 Subject: [PATCH 09/20] collect omsagent-rs limits telemetry if VPA enabled --- kubernetes/omsagent.yaml | 3 ++ .../ruby/ApplicationInsightsUtility.rb | 5 ++++ source/plugins/ruby/KubernetesApiClient.rb | 29 ++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 33aefc038..f9253f4f3 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -735,6 +735,9 @@ spec: # Uncomment below lines for MSI Auth Mode testing # - name: USING_AAD_MSI_AUTH # value: "true" + # Uncomment below lines when the Addon-resizer VPA enabled + # - name: ADDON-RESIZER_VPA_ENABLED + # value: "true" securityContext: privileged: true ports: diff --git a/source/plugins/ruby/ApplicationInsightsUtility.rb b/source/plugins/ruby/ApplicationInsightsUtility.rb index 70d0a400e..7f3ddc7d2 100644 --- a/source/plugins/ruby/ApplicationInsightsUtility.rb +++ b/source/plugins/ruby/ApplicationInsightsUtility.rb @@ -22,6 +22,7 @@ class ApplicationInsightsUtility @@EnvControllerType = "CONTROLLER_TYPE" @@EnvContainerRuntime = "CONTAINER_RUNTIME" @@EnvAADMSIAuthMode = "AAD_MSI_AUTH_MODE" + @@EnvAddonResizerVPAEnabled = "ADDON-RESIZER_VPA_ENABLED" @@isWindows = false @@hostName = (OMS::Common.get_hostname) @@ -93,6 +94,10 @@ def initializeUtility() else @@CustomProperties["aadAuthMSIMode"] = "false" end + addonResizerVPAEnabled = ENV["RS_ADDON-RESIZER_VPA_ENABLED"] + if !addonResizerVPAEnabled.nil? && !addonResizerVPAEnabled.empty? && addonResizerVPAEnabled.downcase == "true".downcase + @@CustomProperties["addonResizerVPAEnabled"] = "true" + end #Check if telemetry is turned off telemetryOffSwitch = ENV["DISABLE_TELEMETRY"] if telemetryOffSwitch && !telemetryOffSwitch.nil? && !telemetryOffSwitch.empty? && telemetryOffSwitch.downcase == "true".downcase diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index ffd76bfbd..cb08c55cb 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -470,6 +470,7 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle if podUid.nil? return metricItems end + podName = pod["metadata"]["name"] nodeName = "" #for unscheduled (non-started) pods nodeName does NOT exist @@ -514,7 +515,25 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle metricCollections.push(metricCollection) metricProps["json_Collections"] = metricCollections.to_json metricItems.push(metricProps) - #No container level limit for the given metric, so default to node level limit + # if the addonResizer + if isAddonResizerVPAEnabled() + if (podName.downcase.start_with?("omsagent-rs-") && podNamespace.eql?("kube-system") && containerName.downcase.start_with?("omsagent") && metricCategory.eql?("limits")) + timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) + @@telemetryTimeTracker = DateTime.now.to_time.to_i + telemetryProps = {} + telemetryProps["PodName"] = podName + telemetryProps["ContainerName"] = containerName + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) + end + end + end + rescue => errorStr + $log.warn("Exception while generating Telemetry from getcontainerCpuMetricItems failed: #{errorStr} for metric #{cpuMetricNameToCollect}") + end + end + end else if (metricCategory == "limits" && !nodeAllocatableRecord.nil? && !nodeAllocatableRecord.empty? && nodeAllocatableRecord.has_key?(metricNameToCollect)) metricValue = nodeAllocatableRecord[metricNameToCollect] @@ -1394,5 +1413,13 @@ def isEmitCacheTelemetry end return isEmitCacheTelemtryEnabled end + + def isAddonResizerVPAEnabled + isAddonResizerVPAEnabled = false + if !ENV["RS_ADDON-RESIZER_VPA_ENABLED"].nil? && !ENV["RS_ADDON-RESIZER_VPA_ENABLED"].empty? && ENV["RS_ADDON-RESIZER_VPA_ENABLED"].downcase == "true".downcase + isAddonResizerVPAEnabled= true + end + return isAddonResizerVPAEnabled + end end end From afa5f45f20d98df993180dfc7e004766694d1226 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 12 Jul 2022 18:53:31 -0700 Subject: [PATCH 10/20] ignore new unfixed vulnerabilities --- .trivyignore | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.trivyignore b/.trivyignore index 56ac504d5..9a1d475bd 100644 --- a/.trivyignore +++ b/.trivyignore @@ -16,4 +16,15 @@ CVE-2021-31799 CVE-2021-28965 #dpkg vulnerability in ubuntu -CVE-2022-1304 \ No newline at end of file +CVE-2022-1304 + +#containerd vulnerability - https://avd.aquasec.com/nvd/cve-2022-31030 +CVE-2022-31030 + +#Poin DTLS v2 vulnerabilities in ubuntu +#https://avd.aquasec.com/nvd/cve-2022-29190 +#https://avd.aquasec.com/nvd/cve-2022-29222 +#https://avd.aquasec.com/nvd/cve-2022-29189 +CVE-2022-29190 +CVE-2022-29222 +CVE-2022-29189 \ No newline at end of file From 3a20c9af6d1769ed1b38ab0d9c4dbf71573c7f02 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 12 Jul 2022 22:28:46 -0700 Subject: [PATCH 11/20] fix bug --- kubernetes/omsagent.yaml | 96 ++++++++++--------- .../ruby/ApplicationInsightsUtility.rb | 2 +- source/plugins/ruby/KubernetesApiClient.rb | 12 +-- 3 files changed, 54 insertions(+), 56 deletions(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index f9253f4f3..73ed8cb29 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -27,11 +27,11 @@ rules: - apiGroups: ["apps", "extensions", "autoscaling"] resources: ["replicasets", "deployments", "horizontalpodautoscalers"] verbs: ["list"] - # Comment below lines if VPA not enabled - - apiGroups: ["apps"] - resources: ["deployments"] - resourceNames: [ "omsagent-rs" ] - verbs: ["get", "patch"] + # Uncomment below lines if AddonResizer VPA enabled + # - apiGroups: ["apps"] + # resources: ["deployments"] + # resourceNames: [ "omsagent-rs" ] + # verbs: ["get", "patch"] # Uncomment below lines for MSI Auth Mode testing # - apiGroups: [""] # resources: ["secrets"] @@ -622,40 +622,42 @@ spec: spec: serviceAccountName: omsagent containers: - - image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:addon-resizer-1.8.16" - imagePullPolicy: IfNotPresent - name: omsagent-vpa - resources: - limits: - cpu: 100m - memory: 300Mi - requests: - cpu: 5m - memory: 30Mi - env: - - name: MY_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: MY_POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - volumeMounts: - - name: omsagent-rs-vpa-config-volume - mountPath: /etc/config - command: - - /pod_nanny - - --config-dir=/etc/config - - --cpu=200m - - --extra-cpu=2m - - --memory=300Mi - - --extra-memory=4Mi - - --poll-period=180000 - - --threshold=5 - - --deployment=omsagent-rs - - --container=omsagent - - --ignore-resource-requests=true + # Uncomment below lines to enable VPA + # - image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:addon-resizer-1.8.16" + # imagePullPolicy: IfNotPresent + # name: omsagent-vpa + # resources: + # limits: + # cpu: 100m + # memory: 300Mi + # requests: + # cpu: 5m + # memory: 30Mi + # env: + # - name: MY_POD_NAME + # valueFrom: + # fieldRef: + # fieldPath: metadata.name + # - name: MY_POD_NAMESPACE + # valueFrom: + # fieldRef: + # fieldPath: metadata.namespace + # volumeMounts: + # - name: omsagent-rs-vpa-config-volume + # mountPath: /etc/config + # command: + # - /pod_nanny + # - --config-dir=/etc/config + # - --cpu=200m + # - --extra-cpu=2m + # - --memory=300Mi + # - --extra-memory=4Mi + # - --poll-period=180000 + # - --threshold=5 + # - --namespace=kube-system + # - --deployment=omsagent-rs + # - --container=omsagent + # - --ignore-resource-requests=true # Uncomment below lines for MSI Auth Mode testing # - name: addon-token-adapter # command: @@ -694,14 +696,14 @@ spec: - name: omsagent image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06272022" imagePullPolicy: IfNotPresent - # uncomment only if VPA not configured - # resources: - # limits: - # cpu: 1 - # memory: 1Gi - # requests: - # cpu: 150m - # memory: 250Mi + resources: + # comment only limits if VPA configured + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 150m + memory: 250Mi env: - name: NUM_OF_FLUENTD_WORKERS valueFrom: diff --git a/source/plugins/ruby/ApplicationInsightsUtility.rb b/source/plugins/ruby/ApplicationInsightsUtility.rb index 7f3ddc7d2..5b00f800c 100644 --- a/source/plugins/ruby/ApplicationInsightsUtility.rb +++ b/source/plugins/ruby/ApplicationInsightsUtility.rb @@ -94,7 +94,7 @@ def initializeUtility() else @@CustomProperties["aadAuthMSIMode"] = "false" end - addonResizerVPAEnabled = ENV["RS_ADDON-RESIZER_VPA_ENABLED"] + addonResizerVPAEnabled = ENV[@@EnvAddonResizerVPAEnabled] if !addonResizerVPAEnabled.nil? && !addonResizerVPAEnabled.empty? && addonResizerVPAEnabled.downcase == "true".downcase @@CustomProperties["addonResizerVPAEnabled"] = "true" end diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index cb08c55cb..2c23f56c6 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -515,9 +515,9 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle metricCollections.push(metricCollection) metricProps["json_Collections"] = metricCollections.to_json metricItems.push(metricProps) - # if the addonResizer + if isAddonResizerVPAEnabled() - if (podName.downcase.start_with?("omsagent-rs-") && podNamespace.eql?("kube-system") && containerName.downcase.start_with?("omsagent") && metricCategory.eql?("limits")) + if (!podName.nil? && podName.downcase.start_with?("omsagent-rs-") && podNamespace.eql?("kube-system") && containerName.downcase.start_with?("omsagent") && metricCategory.eql?("limits")) timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs timeDifferenceInMinutes = timeDifference / 60 if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) @@ -529,12 +529,8 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle end end end - rescue => errorStr - $log.warn("Exception while generating Telemetry from getcontainerCpuMetricItems failed: #{errorStr} for metric #{cpuMetricNameToCollect}") - end - end - end else + #No container level limit for the given metric, so default to node level limit if (metricCategory == "limits" && !nodeAllocatableRecord.nil? && !nodeAllocatableRecord.empty? && nodeAllocatableRecord.has_key?(metricNameToCollect)) metricValue = nodeAllocatableRecord[metricNameToCollect] metricProps = {} @@ -1417,7 +1413,7 @@ def isEmitCacheTelemetry def isAddonResizerVPAEnabled isAddonResizerVPAEnabled = false if !ENV["RS_ADDON-RESIZER_VPA_ENABLED"].nil? && !ENV["RS_ADDON-RESIZER_VPA_ENABLED"].empty? && ENV["RS_ADDON-RESIZER_VPA_ENABLED"].downcase == "true".downcase - isAddonResizerVPAEnabled= true + isAddonResizerVPAEnabled = true end return isAddonResizerVPAEnabled end From d1af3ea8a2ce36a84340ee6bcd6eca3543ef3be7 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 12 Jul 2022 22:31:37 -0700 Subject: [PATCH 12/20] fix bug --- source/plugins/ruby/KubernetesApiClient.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index 2c23f56c6..431ebe221 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -1412,7 +1412,7 @@ def isEmitCacheTelemetry def isAddonResizerVPAEnabled isAddonResizerVPAEnabled = false - if !ENV["RS_ADDON-RESIZER_VPA_ENABLED"].nil? && !ENV["RS_ADDON-RESIZER_VPA_ENABLED"].empty? && ENV["RS_ADDON-RESIZER_VPA_ENABLED"].downcase == "true".downcase + if !ENV["ADDON-RESIZER_VPA_ENABLED"].nil? && !ENV["ADDON-RESIZER_VPA_ENABLED"].empty? && ENV["ADDON-RESIZER_VPA_ENABLED"].downcase == "true".downcase isAddonResizerVPAEnabled = true end return isAddonResizerVPAEnabled From 25dc84f3e7778e7ea8158f7833c3c3a7118e3122 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Tue, 12 Jul 2022 23:48:24 -0700 Subject: [PATCH 13/20] fix bug --- source/plugins/ruby/KubernetesApiClient.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index 431ebe221..ed89c76e8 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -517,7 +517,7 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle metricItems.push(metricProps) if isAddonResizerVPAEnabled() - if (!podName.nil? && podName.downcase.start_with?("omsagent-rs-") && podNamespace.eql?("kube-system") && containerName.downcase.start_with?("omsagent") && metricCategory.eql?("limits")) + if (!podName.nil? && podName.downcase.start_with?("omsagent-rs-") && podNameSpace.eql?("kube-system") && containerName.downcase.start_with?("omsagent") && metricCategory.eql?("limits")) timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs timeDifferenceInMinutes = timeDifference / 60 if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) From 29330dfc76469e3ba1f9d45ea7ed51e184ac8731 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Wed, 13 Jul 2022 09:32:42 -0700 Subject: [PATCH 14/20] bug fix --- source/plugins/ruby/KubernetesApiClient.rb | 33 +++++++++++++++------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index ed89c76e8..3aaf949dd 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -37,7 +37,8 @@ class KubernetesApiClient @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M @@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token" @@TokenStr = nil - @@telemetryTimeTracker = DateTime.now.to_time.to_i + @@cpuLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i + @@memoryLimitstelemetryTimeTracker = DateTime.now.to_time.to_i @@resourceLimitsTelemetryHash = {} def initialize @@ -517,15 +518,27 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle metricItems.push(metricProps) if isAddonResizerVPAEnabled() - if (!podName.nil? && podName.downcase.start_with?("omsagent-rs-") && podNameSpace.eql?("kube-system") && containerName.downcase.start_with?("omsagent") && metricCategory.eql?("limits")) - timeDifference = (DateTime.now.to_time.to_i - @@telemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference / 60 - if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) - @@telemetryTimeTracker = DateTime.now.to_time.to_i - telemetryProps = {} - telemetryProps["PodName"] = podName - telemetryProps["ContainerName"] = containerName - ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) + if (!podName.nil? && podName.downcase.start_with?("omsagent-rs-") && podNameSpace.eql?("kube-system") && containerName.eql?("omsagent") && metricCategory.eql?("limits")) + if metricNameToReturn == "cpuLimitNanoCores" + timeDifference = (DateTime.now.to_time.to_i - @@cpuLimitsTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) + @@cpuLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i + telemetryProps = {} + telemetryProps["PodName"] = podName + telemetryProps["ContainerName"] = containerName + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) + end + elsif metricNameToReturn == "memoryLimitBytes" + timeDifference = (DateTime.now.to_time.to_i - @@memoryLimitsTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) + @@memoryLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i + telemetryProps = {} + telemetryProps["PodName"] = podName + telemetryProps["ContainerName"] = containerName + ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) + end end end end From 1adcac0e62294e94c61a3105eb34a06a9d72ffc0 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Wed, 13 Jul 2022 12:42:11 -0700 Subject: [PATCH 15/20] fix bug --- source/plugins/ruby/KubernetesApiClient.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index 3aaf949dd..a2544a732 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -519,7 +519,7 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle if isAddonResizerVPAEnabled() if (!podName.nil? && podName.downcase.start_with?("omsagent-rs-") && podNameSpace.eql?("kube-system") && containerName.eql?("omsagent") && metricCategory.eql?("limits")) - if metricNameToReturn == "cpuLimitNanoCores" + if metricNametoReturn == "cpuLimitNanoCores" timeDifference = (DateTime.now.to_time.to_i - @@cpuLimitsTelemetryTimeTracker).abs timeDifferenceInMinutes = timeDifference / 60 if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) @@ -529,7 +529,7 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle telemetryProps["ContainerName"] = containerName ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) end - elsif metricNameToReturn == "memoryLimitBytes" + elsif metricNametoReturn == "memoryLimitBytes" timeDifference = (DateTime.now.to_time.to_i - @@memoryLimitsTelemetryTimeTracker).abs timeDifferenceInMinutes = timeDifference / 60 if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) From 3d8be720384772dc6540816b4228eb95c1dc5807 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Wed, 13 Jul 2022 12:57:51 -0700 Subject: [PATCH 16/20] fix bug --- source/plugins/ruby/KubernetesApiClient.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index a2544a732..0d5793ef3 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -38,7 +38,7 @@ class KubernetesApiClient @@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token" @@TokenStr = nil @@cpuLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i - @@memoryLimitstelemetryTimeTracker = DateTime.now.to_time.to_i + @@memoryLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i @@resourceLimitsTelemetryHash = {} def initialize From e06f44337ed5c07bb376b0235d84e138856cdf83 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Wed, 13 Jul 2022 14:24:18 -0700 Subject: [PATCH 17/20] rename env var name --- kubernetes/omsagent.yaml | 2 +- source/plugins/ruby/ApplicationInsightsUtility.rb | 2 +- source/plugins/ruby/KubernetesApiClient.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 73ed8cb29..8f0871be6 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -738,7 +738,7 @@ spec: # - name: USING_AAD_MSI_AUTH # value: "true" # Uncomment below lines when the Addon-resizer VPA enabled - # - name: ADDON-RESIZER_VPA_ENABLED + # - name: RS_ADDON-RESIZER_VPA_ENABLED # value: "true" securityContext: privileged: true diff --git a/source/plugins/ruby/ApplicationInsightsUtility.rb b/source/plugins/ruby/ApplicationInsightsUtility.rb index 5b00f800c..4e38ebce2 100644 --- a/source/plugins/ruby/ApplicationInsightsUtility.rb +++ b/source/plugins/ruby/ApplicationInsightsUtility.rb @@ -22,7 +22,7 @@ class ApplicationInsightsUtility @@EnvControllerType = "CONTROLLER_TYPE" @@EnvContainerRuntime = "CONTAINER_RUNTIME" @@EnvAADMSIAuthMode = "AAD_MSI_AUTH_MODE" - @@EnvAddonResizerVPAEnabled = "ADDON-RESIZER_VPA_ENABLED" + @@EnvAddonResizerVPAEnabled = "RS_ADDON-RESIZER_VPA_ENABLED" @@isWindows = false @@hostName = (OMS::Common.get_hostname) diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index 0d5793ef3..ea4decaf7 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -1425,7 +1425,7 @@ def isEmitCacheTelemetry def isAddonResizerVPAEnabled isAddonResizerVPAEnabled = false - if !ENV["ADDON-RESIZER_VPA_ENABLED"].nil? && !ENV["ADDON-RESIZER_VPA_ENABLED"].empty? && ENV["ADDON-RESIZER_VPA_ENABLED"].downcase == "true".downcase + if !ENV["RS_ADDON-RESIZER_VPA_ENABLED"].nil? && !ENV["RS_ADDON-RESIZER_VPA_ENABLED"].empty? && ENV["RS_ADDON-RESIZER_VPA_ENABLED"].downcase == "true".downcase isAddonResizerVPAEnabled = true end return isAddonResizerVPAEnabled From 84e8440aa34ffbbd92f16fb21b74c2e506afb437 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 14 Jul 2022 13:58:11 -0700 Subject: [PATCH 18/20] use the addon-resizer and collect requests and limits telemetry --- kubernetes/omsagent.yaml | 8 +-- source/plugins/ruby/KubernetesApiClient.rb | 68 ++++++++++++++-------- 2 files changed, 49 insertions(+), 27 deletions(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 8f0871be6..2fa8dad0d 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -622,8 +622,9 @@ spec: spec: serviceAccountName: omsagent containers: - # Uncomment below lines to enable VPA - # - image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:addon-resizer-1.8.16" + # Uncomment below lines to enable VPA + # # Make sure this matching with version in AKS RP side + # - image: "mcr.microsoft.com/oss/kubernetes/autoscaler/addon-resizer:1.8.14" # imagePullPolicy: IfNotPresent # name: omsagent-vpa # resources: @@ -657,7 +658,6 @@ spec: # - --namespace=kube-system # - --deployment=omsagent-rs # - --container=omsagent - # - --ignore-resource-requests=true # Uncomment below lines for MSI Auth Mode testing # - name: addon-token-adapter # command: @@ -696,8 +696,8 @@ spec: - name: omsagent image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06272022" imagePullPolicy: IfNotPresent + # comment resources if VPA configured since the VPA will set these values resources: - # comment only limits if VPA configured limits: cpu: 1 memory: 1Gi diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index ea4decaf7..501a9fb53 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -38,7 +38,9 @@ class KubernetesApiClient @@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token" @@TokenStr = nil @@cpuLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i + @@cpuRequestsTelemetryTimeTracker = DateTime.now.to_time.to_i @@memoryLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i + @@memoryRequestsTelemetryTimeTracker = DateTime.now.to_time.to_i @@resourceLimitsTelemetryHash = {} def initialize @@ -518,29 +520,7 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle metricItems.push(metricProps) if isAddonResizerVPAEnabled() - if (!podName.nil? && podName.downcase.start_with?("omsagent-rs-") && podNameSpace.eql?("kube-system") && containerName.eql?("omsagent") && metricCategory.eql?("limits")) - if metricNametoReturn == "cpuLimitNanoCores" - timeDifference = (DateTime.now.to_time.to_i - @@cpuLimitsTelemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference / 60 - if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) - @@cpuLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i - telemetryProps = {} - telemetryProps["PodName"] = podName - telemetryProps["ContainerName"] = containerName - ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) - end - elsif metricNametoReturn == "memoryLimitBytes" - timeDifference = (DateTime.now.to_time.to_i - @@memoryLimitsTelemetryTimeTracker).abs - timeDifferenceInMinutes = timeDifference / 60 - if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) - @@memoryLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i - telemetryProps = {} - telemetryProps["PodName"] = podName - telemetryProps["ContainerName"] = containerName - ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) - end - end - end + sendReplicasetAgentRequestsAndLimitsTelemetry(podName, podNameSpace, containerName, metricNametoReturn, metricValue) end else #No container level limit for the given metric, so default to node level limit @@ -1430,5 +1410,47 @@ def isAddonResizerVPAEnabled end return isAddonResizerVPAEnabled end + + def sendReplicasetAgentRequestsAndLimitsTelemetry(podName, podNameSpace, containerName, metricName, metricValue) + begin + if (!podName.nil? && podName.downcase.start_with?("omsagent-rs-") && podNameSpace.eql?("kube-system") && containerName.eql?("omsagent")) + telemetryProps = {} + telemetryProps["PodName"] = podName + telemetryProps["ContainerName"] = containerName + case metricName + when "cpuLimitNanoCores" + timeDifference = (DateTime.now.to_time.to_i - @@cpuLimitsTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) + @@cpuLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i + ApplicationInsightsUtility.sendMetricTelemetry(metricName, metricValue, telemetryProps) + end + when "memoryLimitBytes" + timeDifference = (DateTime.now.to_time.to_i - @@memoryLimitsTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) + @@memoryLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i + ApplicationInsightsUtility.sendMetricTelemetry(metricName, metricValue, telemetryProps) + end + when "cpuRequestNanoCores" + timeDifference = (DateTime.now.to_time.to_i - @@cpuRequestsTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) + @@cpuRequestsTelemetryTimeTracker = DateTime.now.to_time.to_i + ApplicationInsightsUtility.sendMetricTelemetry(metricName, metricValue, telemetryProps) + end + when "memoryRequestNanoCores" + timeDifference = (DateTime.now.to_time.to_i - @@memoryRequestsTelemetryTimeTracker).abs + timeDifferenceInMinutes = timeDifference / 60 + if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) + @@memoryRequestsTelemetryTimeTracker = DateTime.now.to_time.to_i + ApplicationInsightsUtility.sendMetricTelemetry(metricName, metricValue, telemetryProps) + end + end + end + rescue => err + @Log.warn "KubernetesApiClient::sendReplicasetAgentRequestsAndLimitsTelemetry failed with an error: #{err}" + end + end end end From 32d549e38cc56b91b56f491d904502e41f15a99e Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Thu, 14 Jul 2022 15:52:59 -0700 Subject: [PATCH 19/20] fix bug --- source/plugins/ruby/KubernetesApiClient.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb index 501a9fb53..26dccceb4 100644 --- a/source/plugins/ruby/KubernetesApiClient.rb +++ b/source/plugins/ruby/KubernetesApiClient.rb @@ -1439,7 +1439,7 @@ def sendReplicasetAgentRequestsAndLimitsTelemetry(podName, podNameSpace, contain @@cpuRequestsTelemetryTimeTracker = DateTime.now.to_time.to_i ApplicationInsightsUtility.sendMetricTelemetry(metricName, metricValue, telemetryProps) end - when "memoryRequestNanoCores" + when "memoryRequestBytes" timeDifference = (DateTime.now.to_time.to_i - @@memoryRequestsTelemetryTimeTracker).abs timeDifferenceInMinutes = timeDifference / 60 if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES) From 7706a94aa2198b7f058ace718a986f426bde91f8 Mon Sep 17 00:00:00 2001 From: Ganga Mahesh Siddem Date: Mon, 25 Jul 2022 15:45:03 -0700 Subject: [PATCH 20/20] minor update --- kubernetes/omsagent.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 2fa8dad0d..9fae29923 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -843,10 +843,11 @@ spec: configMap: name: container-azm-ms-osmconfig optional: true - - name: omsagent-rs-vpa-config-volume - configMap: - name: omsagent-rs-vpa-config - optional: true + # Uncomment below lines to enable VPA + # - name: omsagent-rs-vpa-config-volume + # configMap: + # name: omsagent-rs-vpa-config + # optional: true --- apiVersion: apps/v1 kind: DaemonSet