From 71af792d17232c725bb7e0c80369daa98f2533c7 Mon Sep 17 00:00:00 2001 From: bragi92 Date: Mon, 1 Nov 2021 13:12:11 -0700 Subject: [PATCH 1/7] fix: change default value to a number so that substraction happens correctly --- source/plugins/ruby/kubelet_utils.rb | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/source/plugins/ruby/kubelet_utils.rb b/source/plugins/ruby/kubelet_utils.rb index e31407b54..ce18d4ddd 100644 --- a/source/plugins/ruby/kubelet_utils.rb +++ b/source/plugins/ruby/kubelet_utils.rb @@ -56,73 +56,73 @@ def get_node_allocatable(cpu_capacity, memory_capacity) begin kubereserved_cpu = parsed_response["kubeletconfig"]["kubeReserved"]["cpu"] if kubereserved_cpu.nil? || kubereserved_cpu == "" - kubereserved_cpu = "0" + kubereserved_cpu = 0 end @log.info "get_node_allocatable::kubereserved_cpu #{kubereserved_cpu}" rescue => errorStr @log.error "Error in get_node_allocatable::kubereserved_cpu: #{errorStr}" - kubereserved_cpu = "0" + kubereserved_cpu = 0 ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") end begin kubereserved_memory = parsed_response["kubeletconfig"]["kubeReserved"]["memory"] if kubereserved_memory.nil? || kubereserved_memory == "" - kubereserved_memory = "0" + kubereserved_memory = 0 end @log.info "get_node_allocatable::kubereserved_memory #{kubereserved_memory}" rescue => errorStr @log.error "Error in get_node_allocatable::kubereserved_memory: #{errorStr}" - kubereserved_memory = "0" + kubereserved_memory = 0 ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") end begin systemReserved_cpu = parsed_response["kubeletconfig"]["systemReserved"]["cpu"] if systemReserved_cpu.nil? || systemReserved_cpu == "" - systemReserved_cpu = "0" + systemReserved_cpu = 0 end @log.info "get_node_allocatable::systemReserved_cpu #{systemReserved_cpu}" rescue => errorStr # this will likely always reach this condition for AKS ~ only applicable for hyrid + MDM combination @log.error "Error in get_node_allocatable::systemReserved_cpu: #{errorStr}" - systemReserved_cpu = "0" + systemReserved_cpu = 0 ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") end begin explicitlyReserved_cpu = parsed_response["kubeletconfig"]["reservedCPUs"] if explicitlyReserved_cpu.nil? || explicitlyReserved_cpu == "" - explicitlyReserved_cpu = "0" + explicitlyReserved_cpu = 0 end @log.info "get_node_allocatable::explicitlyReserved_cpu #{explicitlyReserved_cpu}" rescue => errorStr # this will likely always reach this condition for AKS ~ only applicable for hyrid + MDM combination @log.error "Error in get_node_allocatable::explicitlyReserved_cpu: #{errorStr}" - explicitlyReserved_cpu = "0" + explicitlyReserved_cpu = 0 ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::explicitlyReserved_cpu: #{errorStr}") end begin systemReserved_memory = parsed_response["kubeletconfig"]["systemReserved"]["memory"] if systemReserved_memory.nil? || systemReserved_memory == "" - systemReserved_memory = "0" + systemReserved_memory = 0 end @log.info "get_node_allocatable::systemReserved_memory #{systemReserved_memory}" rescue => errorStr @log.error "Error in get_node_allocatable::systemReserved_memory: #{errorStr}" - systemReserved_memory = "0" + systemReserved_memory = 0 ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") end begin evictionHard_memory = parsed_response["kubeletconfig"]["evictionHard"]["memory.available"] if evictionHard_memory.nil? || evictionHard_memory == "" - evictionHard_memory = "0" + evictionHard_memory = 0 end @log.info "get_node_allocatable::evictionHard_memory #{evictionHard_memory}" rescue => errorStr @log.error "Error in get_node_allocatable::evictionHard_memory: #{errorStr}" - evictionHard_memory = "0" + evictionHard_memory = 0 ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") end From db0bf4853e9249d12a9e288d0e9d955918c7afea Mon Sep 17 00:00:00 2001 From: bragi92 Date: Wed, 3 Nov 2021 12:25:42 -0700 Subject: [PATCH 2/7] update fluentd version to 1.14.2 --- .github/workflows/run_unit_tests.yml | 2 +- kubernetes/linux/setup.sh | 2 +- kubernetes/windows/Dockerfile | 2 +- kubernetes/windows/Dockerfile-dev-base-image | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run_unit_tests.yml b/.github/workflows/run_unit_tests.yml index 94ac4371a..435de91e8 100644 --- a/.github/workflows/run_unit_tests.yml +++ b/.github/workflows/run_unit_tests.yml @@ -26,7 +26,7 @@ jobs: uses: actions/checkout@v2 - name: install fluent run: | - sudo gem install fluentd -v "1.12.2" --no-document + sudo gem install fluentd -v "1.14.2" --no-document sudo fluentd --setup ./fluent - name: Run unit tests run: | diff --git a/kubernetes/linux/setup.sh b/kubernetes/linux/setup.sh index 243677dd0..5bddfc604 100644 --- a/kubernetes/linux/setup.sh +++ b/kubernetes/linux/setup.sh @@ -52,7 +52,7 @@ sudo echo "deb http://ppa.launchpad.net/brightbox/ruby-ng/ubuntu bionic main" >> sudo apt-get update sudo apt-get install ruby2.6 ruby2.6-dev gcc make -y # fluentd v1 gem -gem install fluentd -v "1.12.2" --no-document +gem install fluentd -v "1.14.2" --no-document fluentd --setup ./fluent gem install gyoku iso8601 --no-doc diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile index 0ddf67ab2..41ad7e7ba 100644 --- a/kubernetes/windows/Dockerfile +++ b/kubernetes/windows/Dockerfile @@ -20,7 +20,7 @@ RUN refreshenv \ && gem install cool.io -v 1.5.4 --platform ruby \ && gem install oj -v 3.3.10 \ && gem install json -v 2.2.0 \ -&& gem install fluentd -v 1.12.2 \ +&& gem install fluentd -v 1.14.2 \ && gem install win32-service -v 1.0.1 \ && gem install win32-ipc -v 0.7.0 \ && gem install win32-event -v 0.6.3 \ diff --git a/kubernetes/windows/Dockerfile-dev-base-image b/kubernetes/windows/Dockerfile-dev-base-image index 0081f9c53..501fead89 100644 --- a/kubernetes/windows/Dockerfile-dev-base-image +++ b/kubernetes/windows/Dockerfile-dev-base-image @@ -18,7 +18,7 @@ RUN refreshenv \ && gem install cool.io -v 1.5.4 --platform ruby \ && gem install oj -v 3.3.10 \ && gem install json -v 2.2.0 \ -&& gem install fluentd -v 1.12.2 \ +&& gem install fluentd -v 1.14.2 \ && gem install win32-service -v 1.0.1 \ && gem install win32-ipc -v 0.7.0 \ && gem install win32-event -v 0.6.3 \ From 1a8ef70ba4ccf0a4afc12a3dd240eb86eb9e2f66 Mon Sep 17 00:00:00 2001 From: bragi92 Date: Wed, 3 Nov 2021 14:44:45 -0700 Subject: [PATCH 3/7] extra end statement --- build/common/installer/scripts/tomlparser.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/build/common/installer/scripts/tomlparser.rb b/build/common/installer/scripts/tomlparser.rb index 32ea09aa3..03b470205 100644 --- a/build/common/installer/scripts/tomlparser.rb +++ b/build/common/installer/scripts/tomlparser.rb @@ -191,8 +191,6 @@ def populateSettingValuesFromConfigMap(parsedConfig) rescue => errorStr ConfigParseErrorLogger.logError("Exception while reading config map settings for adx database name - #{errorStr}, using default #{@adxDatabaseName}, please check config map for errors") end - - end end end From df0eb1c96506df0d3170f530972fd0124dc0e175 Mon Sep 17 00:00:00 2001 From: bragi92 Date: Wed, 3 Nov 2021 19:14:53 -0700 Subject: [PATCH 4/7] safely set to float --- source/plugins/ruby/kubelet_utils.rb | 36 ++++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/source/plugins/ruby/kubelet_utils.rb b/source/plugins/ruby/kubelet_utils.rb index ce18d4ddd..ca2fa6d96 100644 --- a/source/plugins/ruby/kubelet_utils.rb +++ b/source/plugins/ruby/kubelet_utils.rb @@ -47,6 +47,9 @@ def get_node_allocatable(cpu_capacity, memory_capacity) @log.error "kubelet_utils.rb::get_node_allocatble - cpu_capacity or memory_capacity values not set. Hence we cannot calculate allocatable values" end + cpu_capacity = BigDecimal(cpu_capacity).to_f + memory_capacity = BigDecimal(memory_capacity).to_f + cpu_allocatable = 1.0 memory_allocatable = 1.0 @@ -56,74 +59,68 @@ def get_node_allocatable(cpu_capacity, memory_capacity) begin kubereserved_cpu = parsed_response["kubeletconfig"]["kubeReserved"]["cpu"] if kubereserved_cpu.nil? || kubereserved_cpu == "" - kubereserved_cpu = 0 + kubereserved_cpu = "0.0" end @log.info "get_node_allocatable::kubereserved_cpu #{kubereserved_cpu}" rescue => errorStr @log.error "Error in get_node_allocatable::kubereserved_cpu: #{errorStr}" - kubereserved_cpu = 0 - ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") + kubereserved_cpu = "0.0" end begin kubereserved_memory = parsed_response["kubeletconfig"]["kubeReserved"]["memory"] if kubereserved_memory.nil? || kubereserved_memory == "" - kubereserved_memory = 0 + kubereserved_memory = "0.0" end @log.info "get_node_allocatable::kubereserved_memory #{kubereserved_memory}" rescue => errorStr @log.error "Error in get_node_allocatable::kubereserved_memory: #{errorStr}" - kubereserved_memory = 0 - ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") + kubereserved_memory = "0.0" end begin systemReserved_cpu = parsed_response["kubeletconfig"]["systemReserved"]["cpu"] if systemReserved_cpu.nil? || systemReserved_cpu == "" - systemReserved_cpu = 0 + systemReserved_cpu = "0.0" end @log.info "get_node_allocatable::systemReserved_cpu #{systemReserved_cpu}" rescue => errorStr # this will likely always reach this condition for AKS ~ only applicable for hyrid + MDM combination @log.error "Error in get_node_allocatable::systemReserved_cpu: #{errorStr}" - systemReserved_cpu = 0 - ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") + systemReserved_cpu = "0.0" end begin explicitlyReserved_cpu = parsed_response["kubeletconfig"]["reservedCPUs"] if explicitlyReserved_cpu.nil? || explicitlyReserved_cpu == "" - explicitlyReserved_cpu = 0 + explicitlyReserved_cpu = "0.0" end @log.info "get_node_allocatable::explicitlyReserved_cpu #{explicitlyReserved_cpu}" rescue => errorStr # this will likely always reach this condition for AKS ~ only applicable for hyrid + MDM combination @log.error "Error in get_node_allocatable::explicitlyReserved_cpu: #{errorStr}" - explicitlyReserved_cpu = 0 - ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::explicitlyReserved_cpu: #{errorStr}") + explicitlyReserved_cpu = "0.0" end begin systemReserved_memory = parsed_response["kubeletconfig"]["systemReserved"]["memory"] if systemReserved_memory.nil? || systemReserved_memory == "" - systemReserved_memory = 0 + systemReserved_memory = "0.0" end @log.info "get_node_allocatable::systemReserved_memory #{systemReserved_memory}" rescue => errorStr @log.error "Error in get_node_allocatable::systemReserved_memory: #{errorStr}" - systemReserved_memory = 0 - ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") + systemReserved_memory = "0.0" end begin evictionHard_memory = parsed_response["kubeletconfig"]["evictionHard"]["memory.available"] if evictionHard_memory.nil? || evictionHard_memory == "" - evictionHard_memory = 0 + evictionHard_memory = "0.0" end @log.info "get_node_allocatable::evictionHard_memory #{evictionHard_memory}" rescue => errorStr @log.error "Error in get_node_allocatable::evictionHard_memory: #{errorStr}" - evictionHard_memory = 0 - ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") + evictionHard_memory = "0.0" end # do calculation in nanocore since that's what KubernetesApiClient.getMetricNumericValue expects @@ -142,6 +139,9 @@ def get_node_allocatable(cpu_capacity, memory_capacity) memory_allocatable = memory_capacity - (KubernetesApiClient.getMetricNumericValue("memory", kubereserved_memory) + KubernetesApiClient.getMetricNumericValue("memory", systemReserved_memory) + KubernetesApiClient.getMetricNumericValue("memory", evictionHard_memory)) @log.info "Memory Allocatable #{memory_allocatable}" + cpu_allocatable = BigDecimal(cpu_allocatable).to_f + memory_allocatable = BigDecimal(memory_allocatable).to_f + return [cpu_allocatable, memory_allocatable] rescue => errorStr @log.info "Error get_node_allocatable: #{errorStr}" From 1947e5a6bb25a5a6fdac0d7f57c6bbe289c9d50a Mon Sep 17 00:00:00 2001 From: bragi92 Date: Wed, 3 Nov 2021 20:56:13 -0700 Subject: [PATCH 5/7] big decimal precision --- kubernetes/omsagent.yaml | 36 ++++++++++++++-------------- source/plugins/ruby/kubelet_utils.rb | 12 ++++++---- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 66f8c4010..7015de834 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -333,8 +333,8 @@ metadata: type: Opaque data: #BASE64 ENCODED (Both WSID & KEY) INSIDE DOUBLE QUOTE ("") - WSID: "VALUE_WSID" - KEY: "VALUE_KEY" + WSID: "YzcwZDMwMjUtZDhkMi00ZTI1LWI4YTUtNDFiMzhhMzFhN2Ey" + KEY: "ZHpFR2poeEJwK090TVBTMVVjeVMxSWdsVk93ZHJNSWdKdXZLNGJnOXBwSlluSWEyUFVMNDdVaWlOVUtIOExEZWxWVG81S1JwUVFKTDQxMzhqMnlmN2c9PQ==" --- apiVersion: apps/v1 kind: DaemonSet @@ -368,7 +368,7 @@ spec: value: "3" containers: - name: omsagent - image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10132021" + image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:kaveeshcidevtest11032021-2" imagePullPolicy: IfNotPresent resources: limits: @@ -386,9 +386,9 @@ spec: value: "1" # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these - name: AKS_RESOURCE_ID - value: "VALUE_AKS_RESOURCE_ID_VALUE" + value: "/subscriptions/ce4d1293-71c0-4c72-bc55-133553ee9e50/resourcegroups/mistborn/providers/Microsoft.ContainerService/managedClusters/vin" - name: AKS_REGION - value: "VALUE_AKS_RESOURCE_REGION_VALUE" + value: "eastus" # this used for e2e test and setting this just emits some additional log statements which used for the e2e tests - name: ISTEST value: "true" @@ -403,7 +403,7 @@ spec: fieldPath: status.hostIP # Update this with the user assigned msi client id for omsagent - name: USER_ASSIGNED_IDENTITY_CLIENT_ID - value: "VALUE_USER_ASSIGNED_IDENTITY_CLIENT_ID_VALUE" + value: "75cb887f-ce49-40c1-b8a2-01797b893da0" - name: AZMON_CONTAINERLOGS_ONEAGENT_REGIONS value: "koreacentral,norwayeast,eastus2" - name: USING_AAD_MSI_AUTH @@ -454,7 +454,7 @@ spec: timeoutSeconds: 15 #Only in sidecar scraping mode # - name: omsagent-prometheus - # image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10132021" + # image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:kaveeshcidevtest11032021-2" # imagePullPolicy: IfNotPresent # resources: # limits: @@ -468,9 +468,9 @@ spec: # - name: AKS_CLUSTER_NAME # value: "VALUE_AKS_CLUSTER_NAME" # - name: AKS_RESOURCE_ID - # value: "VALUE_AKS_RESOURCE_ID_VALUE" + # value: "/subscriptions/ce4d1293-71c0-4c72-bc55-133553ee9e50/resourcegroups/mistborn/providers/Microsoft.ContainerService/managedClusters/vin" # - name: AKS_REGION - # value: "VALUE_AKS_RESOURCE_REGION_VALUE" + # value: "eastus" # - name: AKS_NODE_RESOURCE_GROUP # value: "VALUE_AKS_NODE_RESOURCE_GROUP" # #Uncomment below two lines for ACS clusters and set the cluster names manually. Also comment out the above two lines for ACS clusters @@ -486,7 +486,7 @@ spec: # fieldPath: status.hostIP # # Update this with the user assigned msi client id for omsagent # - name: USER_ASSIGNED_IDENTITY_CLIENT_ID - # value: "VALUE_USER_ASSIGNED_IDENTITY_CLIENT_ID_VALUE" + # value: "75cb887f-ce49-40c1-b8a2-01797b893da0" # - name: USING_AAD_MSI_AUTH # value: "false" # securityContext: @@ -603,7 +603,7 @@ spec: serviceAccountName: omsagent containers: - name: omsagent - image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10132021" + image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:kaveeshcidevtest11032021-2" imagePullPolicy: IfNotPresent resources: limits: @@ -614,9 +614,9 @@ spec: memory: 250Mi env: - name: AKS_RESOURCE_ID - value: "VALUE_AKS_RESOURCE_ID_VALUE" + value: "/subscriptions/ce4d1293-71c0-4c72-bc55-133553ee9e50/resourcegroups/mistborn/providers/Microsoft.ContainerService/managedClusters/vin" - name: AKS_REGION - value: "VALUE_AKS_RESOURCE_REGION_VALUE" + value: "eastus" # this used for e2e test and setting this just emits some additional log statements which used for the e2e tests - name: ISTEST value: "true" @@ -631,7 +631,7 @@ spec: fieldPath: status.hostIP # Update this with the user assigned msi client id for omsagent - name: USER_ASSIGNED_IDENTITY_CLIENT_ID - value: "VALUE_USER_ASSIGNED_IDENTITY_CLIENT_ID_VALUE" + value: "75cb887f-ce49-40c1-b8a2-01797b893da0" # Add the below environment variable to true only in sidecar enabled regions, else set it to false - name: SIDECAR_SCRAPING_ENABLED value: "false" @@ -776,7 +776,7 @@ spec: value: "3" containers: - name: omsagent-win - image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod10132021" + image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:win-kaveeshcidevtest11032021-2" imagePullPolicy: IfNotPresent resources: limits: @@ -785,9 +785,9 @@ spec: env: # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these - name: AKS_RESOURCE_ID - value: "VALUE_AKS_RESOURCE_ID_VALUE" + value: "/subscriptions/ce4d1293-71c0-4c72-bc55-133553ee9e50/resourcegroups/mistborn/providers/Microsoft.ContainerService/managedClusters/vin" - name: AKS_REGION - value: "VALUE_AKS_RESOURCE_REGION_VALUE" + value: "eastus" #- name: ACS_RESOURCE_NAME # value: "my_acs_cluster_name" - name: CONTROLLER_TYPE @@ -808,7 +808,7 @@ spec: value: "false" # Update this with the user assigned msi client id for omsagent - name: USER_ASSIGNED_IDENTITY_CLIENT_ID - value: "VALUE_USER_ASSIGNED_IDENTITY_CLIENT_ID_VALUE" + value: "75cb887f-ce49-40c1-b8a2-01797b893da0" # Add this only for clouds that require cert bootstrapping # - name: REQUIRES_CERT_BOOTSTRAP # value: "true" diff --git a/source/plugins/ruby/kubelet_utils.rb b/source/plugins/ruby/kubelet_utils.rb index ca2fa6d96..d26684799 100644 --- a/source/plugins/ruby/kubelet_utils.rb +++ b/source/plugins/ruby/kubelet_utils.rb @@ -47,8 +47,8 @@ def get_node_allocatable(cpu_capacity, memory_capacity) @log.error "kubelet_utils.rb::get_node_allocatble - cpu_capacity or memory_capacity values not set. Hence we cannot calculate allocatable values" end - cpu_capacity = BigDecimal(cpu_capacity).to_f - memory_capacity = BigDecimal(memory_capacity).to_f + cpu_capacity = BigDecimal(cpu_capacity, 2).to_f + memory_capacity = BigDecimal(memory_capacity, 2).to_f cpu_allocatable = 1.0 memory_allocatable = 1.0 @@ -134,13 +134,15 @@ def get_node_allocatable(cpu_capacity, memory_capacity) end # convert back to units similar to what we get for capacity cpu_allocatable = cpu_allocatable / (1000.0 ** 2) - @log.info "CPU Allocatable #{cpu_allocatable}" memory_allocatable = memory_capacity - (KubernetesApiClient.getMetricNumericValue("memory", kubereserved_memory) + KubernetesApiClient.getMetricNumericValue("memory", systemReserved_memory) + KubernetesApiClient.getMetricNumericValue("memory", evictionHard_memory)) + + cpu_allocatable = BigDecimal(cpu_allocatable, 2).to_f + memory_allocatable = BigDecimal(memory_allocatable, 2).to_f + + @log.info "CPU Allocatable #{cpu_allocatable}" @log.info "Memory Allocatable #{memory_allocatable}" - cpu_allocatable = BigDecimal(cpu_allocatable).to_f - memory_allocatable = BigDecimal(memory_allocatable).to_f return [cpu_allocatable, memory_allocatable] rescue => errorStr From aba4614a85adcd73c49b16684adfff97c065fb35 Mon Sep 17 00:00:00 2001 From: bragi92 Date: Wed, 3 Nov 2021 20:58:54 -0700 Subject: [PATCH 6/7] revert omsagent --- kubernetes/omsagent.yaml | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml index 7015de834..a1a843196 100644 --- a/kubernetes/omsagent.yaml +++ b/kubernetes/omsagent.yaml @@ -333,8 +333,8 @@ metadata: type: Opaque data: #BASE64 ENCODED (Both WSID & KEY) INSIDE DOUBLE QUOTE ("") - WSID: "YzcwZDMwMjUtZDhkMi00ZTI1LWI4YTUtNDFiMzhhMzFhN2Ey" - KEY: "ZHpFR2poeEJwK090TVBTMVVjeVMxSWdsVk93ZHJNSWdKdXZLNGJnOXBwSlluSWEyUFVMNDdVaWlOVUtIOExEZWxWVG81S1JwUVFKTDQxMzhqMnlmN2c9PQ==" + WSID: "VALUE_WSID" + KEY: "VALUE_KEY" --- apiVersion: apps/v1 kind: DaemonSet @@ -368,7 +368,7 @@ spec: value: "3" containers: - name: omsagent - image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:kaveeshcidevtest11032021-2" + image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10132021" imagePullPolicy: IfNotPresent resources: limits: @@ -386,9 +386,9 @@ spec: value: "1" # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these - name: AKS_RESOURCE_ID - value: "/subscriptions/ce4d1293-71c0-4c72-bc55-133553ee9e50/resourcegroups/mistborn/providers/Microsoft.ContainerService/managedClusters/vin" + value: "VALUE_AKS_RESOURCE_ID_VALUE" - name: AKS_REGION - value: "eastus" + value: "VALUE_AKS_RESOURCE_REGION_VALUE" # this used for e2e test and setting this just emits some additional log statements which used for the e2e tests - name: ISTEST value: "true" @@ -403,7 +403,7 @@ spec: fieldPath: status.hostIP # Update this with the user assigned msi client id for omsagent - name: USER_ASSIGNED_IDENTITY_CLIENT_ID - value: "75cb887f-ce49-40c1-b8a2-01797b893da0" + value: "VALUE_USER_ASSIGNED_IDENTITY_CLIENT_ID_VALUE" - name: AZMON_CONTAINERLOGS_ONEAGENT_REGIONS value: "koreacentral,norwayeast,eastus2" - name: USING_AAD_MSI_AUTH @@ -454,7 +454,7 @@ spec: timeoutSeconds: 15 #Only in sidecar scraping mode # - name: omsagent-prometheus - # image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:kaveeshcidevtest11032021-2" + # image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10132021" # imagePullPolicy: IfNotPresent # resources: # limits: @@ -468,9 +468,9 @@ spec: # - name: AKS_CLUSTER_NAME # value: "VALUE_AKS_CLUSTER_NAME" # - name: AKS_RESOURCE_ID - # value: "/subscriptions/ce4d1293-71c0-4c72-bc55-133553ee9e50/resourcegroups/mistborn/providers/Microsoft.ContainerService/managedClusters/vin" + # value: "VALUE_AKS_RESOURCE_ID_VALUE" # - name: AKS_REGION - # value: "eastus" + # value: "VALUE_AKS_RESOURCE_REGION_VALUE" # - name: AKS_NODE_RESOURCE_GROUP # value: "VALUE_AKS_NODE_RESOURCE_GROUP" # #Uncomment below two lines for ACS clusters and set the cluster names manually. Also comment out the above two lines for ACS clusters @@ -486,7 +486,7 @@ spec: # fieldPath: status.hostIP # # Update this with the user assigned msi client id for omsagent # - name: USER_ASSIGNED_IDENTITY_CLIENT_ID - # value: "75cb887f-ce49-40c1-b8a2-01797b893da0" + # value: "VALUE_USER_ASSIGNED_IDENTITY_CLIENT_ID_VALUE" # - name: USING_AAD_MSI_AUTH # value: "false" # securityContext: @@ -603,7 +603,7 @@ spec: serviceAccountName: omsagent containers: - name: omsagent - image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:kaveeshcidevtest11032021-2" + image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod10132021" imagePullPolicy: IfNotPresent resources: limits: @@ -614,9 +614,9 @@ spec: memory: 250Mi env: - name: AKS_RESOURCE_ID - value: "/subscriptions/ce4d1293-71c0-4c72-bc55-133553ee9e50/resourcegroups/mistborn/providers/Microsoft.ContainerService/managedClusters/vin" + value: "VALUE_AKS_RESOURCE_ID_VALUE" - name: AKS_REGION - value: "eastus" + value: "VALUE_AKS_RESOURCE_REGION_VALUE" # this used for e2e test and setting this just emits some additional log statements which used for the e2e tests - name: ISTEST value: "true" @@ -631,7 +631,7 @@ spec: fieldPath: status.hostIP # Update this with the user assigned msi client id for omsagent - name: USER_ASSIGNED_IDENTITY_CLIENT_ID - value: "75cb887f-ce49-40c1-b8a2-01797b893da0" + value: "VALUE_USER_ASSIGNED_IDENTITY_CLIENT_ID_VALUE" # Add the below environment variable to true only in sidecar enabled regions, else set it to false - name: SIDECAR_SCRAPING_ENABLED value: "false" @@ -776,7 +776,7 @@ spec: value: "3" containers: - name: omsagent-win - image: "mcr.microsoft.com/azuremonitor/containerinsights/cidev:win-kaveeshcidevtest11032021-2" + image: "mcr.microsoft.com/azuremonitor/containerinsights/ciprod:win-ciprod10132021" imagePullPolicy: IfNotPresent resources: limits: @@ -785,9 +785,9 @@ spec: env: # azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these - name: AKS_RESOURCE_ID - value: "/subscriptions/ce4d1293-71c0-4c72-bc55-133553ee9e50/resourcegroups/mistborn/providers/Microsoft.ContainerService/managedClusters/vin" + value: "VALUE_AKS_RESOURCE_ID_VALUE" - name: AKS_REGION - value: "eastus" + value: "VALUE_AKS_RESOURCE_REGION_VALUE" #- name: ACS_RESOURCE_NAME # value: "my_acs_cluster_name" - name: CONTROLLER_TYPE @@ -808,7 +808,7 @@ spec: value: "false" # Update this with the user assigned msi client id for omsagent - name: USER_ASSIGNED_IDENTITY_CLIENT_ID - value: "75cb887f-ce49-40c1-b8a2-01797b893da0" + value: "VALUE_USER_ASSIGNED_IDENTITY_CLIENT_ID_VALUE" # Add this only for clouds that require cert bootstrapping # - name: REQUIRES_CERT_BOOTSTRAP # value: "true" @@ -927,3 +927,4 @@ spec: names: plural: healthstates kind: HealthState + \ No newline at end of file From 8b5df0a22d3ef923a09f21669b371353bac5bff3 Mon Sep 17 00:00:00 2001 From: bragi92 Date: Thu, 4 Nov 2021 17:16:58 -0700 Subject: [PATCH 7/7] keep telemetry --- source/plugins/ruby/kubelet_utils.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/plugins/ruby/kubelet_utils.rb b/source/plugins/ruby/kubelet_utils.rb index d26684799..368ca8639 100644 --- a/source/plugins/ruby/kubelet_utils.rb +++ b/source/plugins/ruby/kubelet_utils.rb @@ -65,6 +65,7 @@ def get_node_allocatable(cpu_capacity, memory_capacity) rescue => errorStr @log.error "Error in get_node_allocatable::kubereserved_cpu: #{errorStr}" kubereserved_cpu = "0.0" + ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_cpu: #{errorStr}") end begin @@ -76,6 +77,7 @@ def get_node_allocatable(cpu_capacity, memory_capacity) rescue => errorStr @log.error "Error in get_node_allocatable::kubereserved_memory: #{errorStr}" kubereserved_memory = "0.0" + ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::kubereserved_memory: #{errorStr}") end begin systemReserved_cpu = parsed_response["kubeletconfig"]["systemReserved"]["cpu"] @@ -87,6 +89,7 @@ def get_node_allocatable(cpu_capacity, memory_capacity) # this will likely always reach this condition for AKS ~ only applicable for hyrid + MDM combination @log.error "Error in get_node_allocatable::systemReserved_cpu: #{errorStr}" systemReserved_cpu = "0.0" + ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::systemReserved_cpu: #{errorStr}") end begin @@ -99,6 +102,7 @@ def get_node_allocatable(cpu_capacity, memory_capacity) # this will likely always reach this condition for AKS ~ only applicable for hyrid + MDM combination @log.error "Error in get_node_allocatable::explicitlyReserved_cpu: #{errorStr}" explicitlyReserved_cpu = "0.0" + ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::explicitlyReserved_cpu: #{errorStr}") end begin @@ -110,6 +114,7 @@ def get_node_allocatable(cpu_capacity, memory_capacity) rescue => errorStr @log.error "Error in get_node_allocatable::systemReserved_memory: #{errorStr}" systemReserved_memory = "0.0" + ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::systemReserved_memory: #{errorStr}") end begin @@ -121,6 +126,7 @@ def get_node_allocatable(cpu_capacity, memory_capacity) rescue => errorStr @log.error "Error in get_node_allocatable::evictionHard_memory: #{errorStr}" evictionHard_memory = "0.0" + ApplicationInsightsUtility.sendExceptionTelemetry("Error in get_node_allocatable::evictionHard_memory: #{errorStr}") end # do calculation in nanocore since that's what KubernetesApiClient.getMetricNumericValue expects @@ -143,7 +149,6 @@ def get_node_allocatable(cpu_capacity, memory_capacity) @log.info "CPU Allocatable #{cpu_allocatable}" @log.info "Memory Allocatable #{memory_allocatable}" - return [cpu_allocatable, memory_allocatable] rescue => errorStr @log.info "Error get_node_allocatable: #{errorStr}"