Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions kubernetes/container-azm-ms-vpaconfig.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
kind: ConfigMap
apiVersion: v1
data:
NannyConfiguration: |-
apiVersion: nannyconfig/v1alpha1
kind: NannyConfiguration
baseCPU: 200m
cpuPerNode: 2m
baseMemory: 350Mi
memoryPerNode: 4Mi
metadata:
name: container-azm-ms-vpaconfig
namespace: kube-system
50 changes: 50 additions & 0 deletions kubernetes/omsagent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ rules:
- apiGroups: ["apps", "extensions", "autoscaling"]
resources: ["replicasets", "deployments", "horizontalpodautoscalers"]
verbs: ["list"]
# Uncomment below lines if AddonResizer VPA enabled
# - apiGroups: ["apps"]
# resources: ["deployments"]
# resourceNames: [ "omsagent-rs" ]
# verbs: ["get", "patch"]
# Uncomment below lines for MSI Auth Mode testing
# - apiGroups: [""]
# resources: ["secrets"]
Expand Down Expand Up @@ -617,6 +622,42 @@ spec:
spec:
serviceAccountName: omsagent
containers:
# Uncomment below lines to enable VPA
# # Make sure this matching with version in AKS RP side
# - image: "mcr.microsoft.com/oss/kubernetes/autoscaler/addon-resizer:1.8.14"
# imagePullPolicy: IfNotPresent
# name: omsagent-vpa
# resources:
# limits:
# cpu: 100m
# memory: 300Mi
# requests:
# cpu: 5m
# memory: 30Mi
# env:
# - name: MY_POD_NAME
# valueFrom:
# fieldRef:
# fieldPath: metadata.name
# - name: MY_POD_NAMESPACE
# valueFrom:
# fieldRef:
# fieldPath: metadata.namespace
# volumeMounts:
# - name: omsagent-rs-vpa-config-volume
# mountPath: /etc/config
# command:
# - /pod_nanny
# - --config-dir=/etc/config
# - --cpu=200m
# - --extra-cpu=2m
# - --memory=300Mi
# - --extra-memory=4Mi
# - --poll-period=180000
# - --threshold=5
# - --namespace=kube-system
# - --deployment=omsagent-rs
# - --container=omsagent
# Uncomment below lines for MSI Auth Mode testing
# - name: addon-token-adapter
# command:
Expand Down Expand Up @@ -655,6 +696,7 @@ spec:
- name: omsagent
image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod06272022-hotfix"
imagePullPolicy: IfNotPresent
# comment resources if VPA configured since the VPA will set these values
resources:
limits:
cpu: 1
Expand Down Expand Up @@ -695,6 +737,9 @@ spec:
# Uncomment below lines for MSI Auth Mode testing
# - name: USING_AAD_MSI_AUTH
# value: "true"
# Uncomment below lines when the Addon-resizer VPA enabled
# - name: RS_ADDON-RESIZER_VPA_ENABLED
# value: "true"
securityContext:
privileged: true
ports:
Expand Down Expand Up @@ -798,6 +843,11 @@ spec:
configMap:
name: container-azm-ms-osmconfig
optional: true
# Uncomment below lines to enable VPA
# - name: omsagent-rs-vpa-config-volume
# configMap:
# name: omsagent-rs-vpa-config
# optional: true
---
apiVersion: apps/v1
kind: DaemonSet
Expand Down
5 changes: 5 additions & 0 deletions source/plugins/ruby/ApplicationInsightsUtility.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class ApplicationInsightsUtility
@@EnvControllerType = "CONTROLLER_TYPE"
@@EnvContainerRuntime = "CONTAINER_RUNTIME"
@@EnvAADMSIAuthMode = "AAD_MSI_AUTH_MODE"
@@EnvAddonResizerVPAEnabled = "RS_ADDON-RESIZER_VPA_ENABLED"

@@isWindows = false
@@hostName = (OMS::Common.get_hostname)
Expand Down Expand Up @@ -93,6 +94,10 @@ def initializeUtility()
else
@@CustomProperties["aadAuthMSIMode"] = "false"
end
addonResizerVPAEnabled = ENV[@@EnvAddonResizerVPAEnabled]
if !addonResizerVPAEnabled.nil? && !addonResizerVPAEnabled.empty? && addonResizerVPAEnabled.downcase == "true".downcase
@@CustomProperties["addonResizerVPAEnabled"] = "true"
end
#Check if telemetry is turned off
telemetryOffSwitch = ENV["DISABLE_TELEMETRY"]
if telemetryOffSwitch && !telemetryOffSwitch.nil? && !telemetryOffSwitch.empty? && telemetryOffSwitch.downcase == "true".downcase
Expand Down
62 changes: 60 additions & 2 deletions source/plugins/ruby/KubernetesApiClient.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ class KubernetesApiClient
@Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
@@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token"
@@TokenStr = nil
@@telemetryTimeTracker = DateTime.now.to_time.to_i
@@cpuLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i
@@cpuRequestsTelemetryTimeTracker = DateTime.now.to_time.to_i
@@memoryLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i
@@memoryRequestsTelemetryTimeTracker = DateTime.now.to_time.to_i
@@resourceLimitsTelemetryHash = {}

def initialize
Expand Down Expand Up @@ -470,6 +473,7 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle
if podUid.nil?
return metricItems
end
podName = pod["metadata"]["name"]

nodeName = ""
#for unscheduled (non-started) pods nodeName does NOT exist
Expand Down Expand Up @@ -514,8 +518,12 @@ def getContainerResourceRequestsAndLimits(pod, metricCategory, metricNameToColle
metricCollections.push(metricCollection)
metricProps["json_Collections"] = metricCollections.to_json
metricItems.push(metricProps)
#No container level limit for the given metric, so default to node level limit

if isAddonResizerVPAEnabled()
sendReplicasetAgentRequestsAndLimitsTelemetry(podName, podNameSpace, containerName, metricNametoReturn, metricValue)
end
else
#No container level limit for the given metric, so default to node level limit
if (metricCategory == "limits" && !nodeAllocatableRecord.nil? && !nodeAllocatableRecord.empty? && nodeAllocatableRecord.has_key?(metricNameToCollect))
metricValue = getMetricNumericValue(metricNameToCollect, nodeAllocatableRecord[metricNameToCollect])
metricProps = {}
Expand Down Expand Up @@ -1394,5 +1402,55 @@ def isEmitCacheTelemetry
end
return isEmitCacheTelemtryEnabled
end

def isAddonResizerVPAEnabled
isAddonResizerVPAEnabled = false
if !ENV["RS_ADDON-RESIZER_VPA_ENABLED"].nil? && !ENV["RS_ADDON-RESIZER_VPA_ENABLED"].empty? && ENV["RS_ADDON-RESIZER_VPA_ENABLED"].downcase == "true".downcase
isAddonResizerVPAEnabled = true
end
return isAddonResizerVPAEnabled
end

def sendReplicasetAgentRequestsAndLimitsTelemetry(podName, podNameSpace, containerName, metricName, metricValue)
begin
if (!podName.nil? && podName.downcase.start_with?("omsagent-rs-") && podNameSpace.eql?("kube-system") && containerName.eql?("omsagent"))
telemetryProps = {}
telemetryProps["PodName"] = podName
telemetryProps["ContainerName"] = containerName
case metricName
when "cpuLimitNanoCores"
timeDifference = (DateTime.now.to_time.to_i - @@cpuLimitsTelemetryTimeTracker).abs
timeDifferenceInMinutes = timeDifference / 60
if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
@@cpuLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i
ApplicationInsightsUtility.sendMetricTelemetry(metricName, metricValue, telemetryProps)
end
when "memoryLimitBytes"
timeDifference = (DateTime.now.to_time.to_i - @@memoryLimitsTelemetryTimeTracker).abs
timeDifferenceInMinutes = timeDifference / 60
if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
@@memoryLimitsTelemetryTimeTracker = DateTime.now.to_time.to_i
ApplicationInsightsUtility.sendMetricTelemetry(metricName, metricValue, telemetryProps)
end
when "cpuRequestNanoCores"
timeDifference = (DateTime.now.to_time.to_i - @@cpuRequestsTelemetryTimeTracker).abs
timeDifferenceInMinutes = timeDifference / 60
if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
@@cpuRequestsTelemetryTimeTracker = DateTime.now.to_time.to_i
ApplicationInsightsUtility.sendMetricTelemetry(metricName, metricValue, telemetryProps)
end
when "memoryRequestBytes"
timeDifference = (DateTime.now.to_time.to_i - @@memoryRequestsTelemetryTimeTracker).abs
timeDifferenceInMinutes = timeDifference / 60
if (timeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
@@memoryRequestsTelemetryTimeTracker = DateTime.now.to_time.to_i
ApplicationInsightsUtility.sendMetricTelemetry(metricName, metricValue, telemetryProps)
end
end
end
rescue => err
@Log.warn "KubernetesApiClient::sendReplicasetAgentRequestsAndLimitsTelemetry failed with an error: #{err}"
end
end
end
end