diff --git a/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb b/build/common/installer/scripts/tomlparser-mdm-metrics-config.rb
similarity index 75%
rename from build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb
rename to build/common/installer/scripts/tomlparser-mdm-metrics-config.rb
index dcf179bf2..b6a4419cf 100644
--- a/build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb
+++ b/build/common/installer/scripts/tomlparser-mdm-metrics-config.rb
@@ -1,9 +1,16 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-require_relative "tomlrb"
-require_relative "ConfigParseErrorLogger"
+#this should be require relative in Linux and require in windows, since it is a gem install on windows
+@os_type = ENV["OS_TYPE"]
+if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+ require "tomlrb"
+else
+ require_relative "tomlrb"
+end
+
require_relative "/etc/fluent/plugin/constants"
+require_relative "ConfigParseErrorLogger"
@configMapMountPath = "/etc/config/settings/alertable-metrics-configuration-settings"
@configVersion = ""
@@ -124,6 +131,10 @@ def populateSettingValuesFromConfigMap(parsedConfig)
end
end
+def get_command_windows(env_variable_name, env_variable_value)
+ return "[System.Environment]::SetEnvironmentVariable(\"#{env_variable_name}\", \"#{env_variable_value}\", \"Process\")" + "\n" + "[System.Environment]::SetEnvironmentVariable(\"#{env_variable_name}\", \"#{env_variable_value}\", \"Machine\")" + "\n"
+end
+
@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
puts "****************Start MDM Metrics Config Processing********************"
if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version, so hardcoding it
@@ -137,19 +148,37 @@ def populateSettingValuesFromConfigMap(parsedConfig)
end
end
-# Write the settings to file, so that they can be set as environment variables
-file = File.open("config_mdm_metrics_env_var", "w")
+if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+ # Write the settings to file, so that they can be set as environment variables in windows container
+ file = File.open("setmdmenv.ps1", "w")
-if !file.nil?
- file.write("export AZMON_ALERT_CONTAINER_CPU_THRESHOLD=#{@percentageCpuUsageThreshold}\n")
- file.write("export AZMON_ALERT_CONTAINER_MEMORY_RSS_THRESHOLD=#{@percentageMemoryRssThreshold}\n")
- file.write("export AZMON_ALERT_CONTAINER_MEMORY_WORKING_SET_THRESHOLD=\"#{@percentageMemoryWorkingSetThreshold}\"\n")
- file.write("export AZMON_ALERT_PV_USAGE_THRESHOLD=#{@percentagePVUsageThreshold}\n")
- file.write("export AZMON_ALERT_JOB_COMPLETION_TIME_THRESHOLD=#{@jobCompletionThresholdMinutes}\n")
- # Close file after writing all MDM setting environment variables
- file.close
- puts "****************End MDM Metrics Config Processing********************"
+ if !file.nil?
+ commands = get_command_windows("AZMON_ALERT_CONTAINER_CPU_THRESHOLD", @percentageCpuUsageThreshold)
+ file.write(commands)
+ commands = get_command_windows("AZMON_ALERT_CONTAINER_MEMORY_WORKING_SET_THRESHOLD", @percentageMemoryWorkingSetThreshold)
+ file.write(commands)
+ # Close file after writing all environment variables
+ file.close
+ puts "****************End MDM Metrics Config Processing********************"
+ else
+ puts "Exception while opening file for writing MDM metric config environment variables"
+ puts "****************End MDM Metrics Config Processing********************"
+ end
else
- puts "Exception while opening file for writing MDM metric config environment variables"
- puts "****************End MDM Metrics Config Processing********************"
+ # Write the settings to file, so that they can be set as environment variables in linux container
+ file = File.open("config_mdm_metrics_env_var", "w")
+
+ if !file.nil?
+ file.write("export AZMON_ALERT_CONTAINER_CPU_THRESHOLD=#{@percentageCpuUsageThreshold}\n")
+ file.write("export AZMON_ALERT_CONTAINER_MEMORY_RSS_THRESHOLD=#{@percentageMemoryRssThreshold}\n")
+ file.write("export AZMON_ALERT_CONTAINER_MEMORY_WORKING_SET_THRESHOLD=\"#{@percentageMemoryWorkingSetThreshold}\"\n")
+ file.write("export AZMON_ALERT_PV_USAGE_THRESHOLD=#{@percentagePVUsageThreshold}\n")
+ file.write("export AZMON_ALERT_JOB_COMPLETION_TIME_THRESHOLD=#{@jobCompletionThresholdMinutes}\n")
+ # Close file after writing all MDM setting environment variables
+ file.close
+ puts "****************End MDM Metrics Config Processing********************"
+ else
+ puts "Exception while opening file for writing MDM metric config environment variables"
+ puts "****************End MDM Metrics Config Processing********************"
+ end
end
diff --git a/build/linux/installer/datafiles/base_container.data b/build/linux/installer/datafiles/base_container.data
index b9f889dba..de8ccbba0 100644
--- a/build/linux/installer/datafiles/base_container.data
+++ b/build/linux/installer/datafiles/base_container.data
@@ -42,7 +42,7 @@ MAINTAINER: 'Microsoft Corporation'
/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh; build/linux/installer/scripts/TelegrafTCPErrorTelemetry.sh; 755; root; root
/opt/livenessprobe.sh; build/linux/installer/scripts/livenessprobe.sh; 755; root; root
/opt/tomlparser-prom-customconfig.rb; build/common/installer/scripts/tomlparser-prom-customconfig.rb; 755; root; root
-/opt/tomlparser-mdm-metrics-config.rb; build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb; 755; root; root
+/opt/tomlparser-mdm-metrics-config.rb; build/common/installer/scripts/tomlparser-mdm-metrics-config.rb; 755; root; root
/opt/tomlparser-metric-collection-config.rb; build/linux/installer/scripts/tomlparser-metric-collection-config.rb; 755; root; root
diff --git a/build/windows/Makefile.ps1 b/build/windows/Makefile.ps1
index 2d49330ea..737abc92a 100644
--- a/build/windows/Makefile.ps1
+++ b/build/windows/Makefile.ps1
@@ -180,4 +180,14 @@ $exclude = @('*.cs','*.csproj')
Copy-Item -Path $installerdir -Destination $publishdir -Recurse -Force -Exclude $exclude
Write-Host("successfully copied installer files conf and scripts from :" + $installerdir + " to :" + $publishdir + " ") -ForegroundColor Green
+$rubyplugindir = Join-Path -Path $rootdir -ChildPath "source\plugins\ruby"
+Write-Host("copying ruby source files from :" + $rubyplugindir + " to :" + $publishdir + " ...")
+Copy-Item -Path $rubyplugindir -Destination $publishdir -Recurse -Force
+Write-Host("successfully copied ruby source files from :" + $rubyplugindir + " to :" + $publishdir + " ") -ForegroundColor Green
+
+$utilsplugindir = Join-Path -Path $rootdir -ChildPath "source\plugins\utils"
+Write-Host("copying ruby util files from :" + $utilsplugindir + " to :" + $publishdir + " ...")
+Copy-Item -Path $utilsplugindir -Destination $publishdir -Recurse -Force
+Write-Host("successfully copied ruby util files from :" + $utilsplugindir + " to :" + $publishdir + " ") -ForegroundColor Green
+
Set-Location $currentdir
\ No newline at end of file
diff --git a/build/windows/installer/conf/fluent.conf b/build/windows/installer/conf/fluent.conf
index d5eb475ca..741e5ce19 100644
--- a/build/windows/installer/conf/fluent.conf
+++ b/build/windows/installer/conf/fluent.conf
@@ -4,6 +4,13 @@
@log_level info
+
+ @type cadvisor_perf
+ tag oms.api.cadvisorperf
+ run_interval 60
+ @log_level debug
+
+
@type tail
path "#{ENV['AZMON_LOG_TAIL_PATH']}"
@@ -29,6 +36,14 @@
@include fluent-docker-parser.conf
+#custom_metrics_mdm filter plugin
+
+ @type cadvisor2mdm
+ metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
+ log_path /etc/omsagentwindows/filter_cadvisor2mdm.log
+ @log_level info
+
+
@type grep
@@ -46,6 +61,23 @@
+
+ @type mdm
+ @log_level debug
+
+ @type file
+ path /etc/omsagentwindows/out_mdm_cdvisorperf.buffer
+ overflow_action drop_oldest_chunk
+ chunk_limit_size 4m
+ flush_interval 20s
+ retry_max_times 10
+ retry_wait 5s
+ retry_max_interval 5m
+ flush_thread_count 5
+
+ retry_mdm_post_wait_minutes 30
+
+
@type forward
send_timeout 60s
diff --git a/kubernetes/omsagent.yaml b/kubernetes/omsagent.yaml
index ab6bbea9c..4290e1d59 100644
--- a/kubernetes/omsagent.yaml
+++ b/kubernetes/omsagent.yaml
@@ -790,6 +790,9 @@ spec:
fieldPath: status.hostIP
- name: SIDECAR_SCRAPING_ENABLED
value: "true"
+ # Update this with the user assigned msi client id for omsagent
+ - name: USER_ASSIGNED_IDENTITY_CLIENT_ID
+ value: ""
# Add this only for clouds that require cert bootstrapping
- name: REQUIRES_CERT_BOOTSTRAP
value: "true"
@@ -812,6 +815,9 @@ spec:
# - mountPath: C:\ca
# name: ca-certs
# readOnly: true
+ - mountPath: C:\etc\kubernetes\host
+ name: azure-json-path
+ readOnly: true
livenessProbe:
exec:
command:
@@ -843,6 +849,9 @@ spec:
- name: docker-windows-kuberenetes-container-logs
hostPath:
path: C:\var
+ - name: azure-json-path
+ hostPath:
+ path: C:\k
# Need to mount this only for airgapped clouds - Commenting this since it wont exist in non airgapped clouds
#- name: ca-certs
# hostPath:
diff --git a/kubernetes/windows/Dockerfile b/kubernetes/windows/Dockerfile
index fefd089a8..5a5298d0b 100644
--- a/kubernetes/windows/Dockerfile
+++ b/kubernetes/windows/Dockerfile
@@ -20,7 +20,7 @@ RUN refreshenv \
&& gem install cool.io -v 1.5.4 --platform ruby \
&& gem install oj -v 3.3.10 \
&& gem install json -v 2.2.0 \
-&& gem install fluentd -v 1.10.2 \
+&& gem install fluentd -v 1.12.2 \
&& gem install win32-service -v 1.0.1 \
&& gem install win32-ipc -v 0.7.0 \
&& gem install win32-event -v 0.6.3 \
@@ -69,6 +69,10 @@ COPY ./omsagentwindows/installer/conf/telegraf.conf /etc/telegraf/
# copy keepcert alive ruby scripts
COPY ./omsagentwindows/installer/scripts/rubyKeepCertificateAlive/*.rb /etc/fluent/plugin/
+#Copy fluentd ruby plugins
+COPY ./omsagentwindows/ruby/ /etc/fluent/plugin/
+COPY ./omsagentwindows/utils/*.rb /etc/fluent/plugin/
+
ENV AGENT_VERSION ${IMAGE_TAG}
ENV OS_TYPE "windows"
ENV APPLICATIONINSIGHTS_AUTH "NzAwZGM5OGYtYTdhZC00NThkLWI5NWMtMjA3ZjM3NmM3YmRi"
diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1
index baf95fca4..bc053b0d6 100644
--- a/kubernetes/windows/main.ps1
+++ b/kubernetes/windows/main.ps1
@@ -134,9 +134,6 @@ function Set-EnvironmentVariables {
[System.Environment]::SetEnvironmentVariable("APPLICATIONINSIGHTS_ENDPOINT", $appInsightsEndpoint, "machine")
Write-Host "Successfully set environment variable APPLICATIONINSIGHTS_ENDPOINT - $($appInsightsEndpoint) for target 'machine'..."
}
- else {
- Write-Host "Failed to set environment variable APPLICATIONINSIGHTS_ENDPOINT for target 'machine' since it is either null or empty"
- }
# Check if the instrumentation key needs to be fetched from a storage account (as in airgapped clouds)
$aiKeyURl = [System.Environment]::GetEnvironmentVariable('APPLICATIONINSIGHTS_AUTH_URL')
@@ -180,14 +177,71 @@ function Set-EnvironmentVariables {
[System.Environment]::SetEnvironmentVariable("TELEMETRY_APPLICATIONINSIGHTS_KEY", $aiKeyDecoded, "Process")
[System.Environment]::SetEnvironmentVariable("TELEMETRY_APPLICATIONINSIGHTS_KEY", $aiKeyDecoded, "Machine")
+ # Setting environment variables required by the fluentd plugins
+ $aksResourceId = [System.Environment]::GetEnvironmentVariable("AKS_RESOURCE_ID", "process")
+ if (![string]::IsNullOrEmpty($aksResourceId)) {
+ [System.Environment]::SetEnvironmentVariable("AKS_RESOURCE_ID", $aksResourceId, "machine")
+ Write-Host "Successfully set environment variable AKS_RESOURCE_ID - $($aksResourceId) for target 'machine'..."
+ }
+ else {
+ Write-Host "Failed to set environment variable AKS_RESOURCE_ID for target 'machine' since it is either null or empty"
+ }
+
+ $aksRegion = [System.Environment]::GetEnvironmentVariable("AKS_REGION", "process")
+ if (![string]::IsNullOrEmpty($aksRegion)) {
+ [System.Environment]::SetEnvironmentVariable("AKS_REGION", $aksRegion, "machine")
+ Write-Host "Successfully set environment variable AKS_REGION - $($aksRegion) for target 'machine'..."
+ }
+ else {
+ Write-Host "Failed to set environment variable AKS_REGION for target 'machine' since it is either null or empty"
+ }
+
+ $controllerType = [System.Environment]::GetEnvironmentVariable("CONTROLLER_TYPE", "process")
+ if (![string]::IsNullOrEmpty($controllerType)) {
+ [System.Environment]::SetEnvironmentVariable("CONTROLLER_TYPE", $controllerType, "machine")
+ Write-Host "Successfully set environment variable CONTROLLER_TYPE - $($controllerType) for target 'machine'..."
+ }
+ else {
+ Write-Host "Failed to set environment variable CONTROLLER_TYPE for target 'machine' since it is either null or empty"
+ }
+
+ $osType = [System.Environment]::GetEnvironmentVariable("OS_TYPE", "process")
+ if (![string]::IsNullOrEmpty($osType)) {
+ [System.Environment]::SetEnvironmentVariable("OS_TYPE", $osType, "machine")
+ Write-Host "Successfully set environment variable OS_TYPE - $($osType) for target 'machine'..."
+ }
+ else {
+ Write-Host "Failed to set environment variable OS_TYPE for target 'machine' since it is either null or empty"
+ }
+
+ $userMsi = [System.Environment]::GetEnvironmentVariable("USER_ASSIGNED_IDENTITY_CLIENT_ID", "process")
+ if (![string]::IsNullOrEmpty($userMsi)) {
+ [System.Environment]::SetEnvironmentVariable("USER_ASSIGNED_IDENTITY_CLIENT_ID", $userMsi, "machine")
+ Write-Host "Successfully set environment variable USER_ASSIGNED_IDENTITY_CLIENT_ID - $($userMsi) for target 'machine'..."
+ }
+
+ $hostName = [System.Environment]::GetEnvironmentVariable("HOSTNAME", "process")
+ if (![string]::IsNullOrEmpty($hostName)) {
+ [System.Environment]::SetEnvironmentVariable("HOSTNAME", $hostName, "machine")
+ Write-Host "Successfully set environment variable HOSTNAME - $($hostName) for target 'machine'..."
+ }
+ else {
+ Write-Host "Failed to set environment variable HOSTNAME for target 'machine' since it is either null or empty"
+ }
+
# run config parser
ruby /opt/omsagentwindows/scripts/ruby/tomlparser.rb
.\setenv.ps1
+
+ # run mdm config parser
+ ruby /opt/omsagentwindows/scripts/ruby/tomlparser-mdm-metrics-config.rb
+ .\setmdmenv.ps1
}
function Get-ContainerRuntime {
# default container runtime and make default as containerd when containerd becomes default in AKS
$containerRuntime = "docker"
+ $cAdvisorIsSecure = "false"
$response = ""
$NODE_IP = ""
try {
@@ -227,6 +281,7 @@ function Get-ContainerRuntime {
if (![string]::IsNullOrEmpty($response) -and $response.StatusCode -eq 200) {
Write-Host "API call to https://$($NODE_IP):10250/pods succeeded"
$isPodsAPISuccess = $true
+ $cAdvisorIsSecure = "true"
}
}
catch {
@@ -234,6 +289,11 @@ function Get-ContainerRuntime {
}
}
+ # set IS_SECURE_CADVISOR_PORT env for debug and telemetry purpose
+ Write-Host "Setting IS_SECURE_CADVISOR_PORT environment variable as $($cAdvisorIsSecure)"
+ [System.Environment]::SetEnvironmentVariable("IS_SECURE_CADVISOR_PORT", $cAdvisorIsSecure, "Process")
+ [System.Environment]::SetEnvironmentVariable("IS_SECURE_CADVISOR_PORT", $cAdvisorIsSecure, "Machine")
+
if ($isPodsAPISuccess) {
if (![string]::IsNullOrEmpty($response.Content)) {
$podList = $response.Content | ConvertFrom-Json
diff --git a/source/plugins/ruby/ApplicationInsightsUtility.rb b/source/plugins/ruby/ApplicationInsightsUtility.rb
index 6ae567337..74d08c1e6 100644
--- a/source/plugins/ruby/ApplicationInsightsUtility.rb
+++ b/source/plugins/ruby/ApplicationInsightsUtility.rb
@@ -21,10 +21,15 @@ class ApplicationInsightsUtility
@@EnvApplicationInsightsEndpoint = "APPLICATIONINSIGHTS_ENDPOINT"
@@EnvControllerType = "CONTROLLER_TYPE"
@@EnvContainerRuntime = "CONTAINER_RUNTIME"
-
+ @@isWindows = false
+ @@hostName = (OMS::Common.get_hostname)
+ @@os_type = ENV["OS_TYPE"]
+ if !@@os_type.nil? && !@@os_type.empty? && @@os_type.strip.casecmp("windows") == 0
+ @@isWindows = true
+ @@hostName = ENV["HOSTNAME"]
+ end
@@CustomProperties = {}
@@Tc = nil
- @@hostName = (OMS::Common.get_hostname)
@@proxy = (ProxyUtils.getProxyConfiguration)
def initialize
@@ -133,16 +138,23 @@ def initializeUtility()
end
def getContainerRuntimeInfo()
- containerRuntime = ENV[@@EnvContainerRuntime]
- if !containerRuntime.nil? && !containerRuntime.empty?
- # DockerVersion field holds either containerRuntime for non-docker or Dockerversion if its docker
- @@CustomProperties["DockerVersion"] = containerRuntime
- if containerRuntime.casecmp("docker") == 0
- dockerInfo = DockerApiClient.dockerInfo
- if (!dockerInfo.nil? && !dockerInfo.empty?)
- @@CustomProperties["DockerVersion"] = dockerInfo["Version"]
+ begin
+ # Not doing this for windows since docker is being deprecated soon and we dont want to bring in the socket dependency.
+ if !@@isWindows.nil? && @@isWindows == false
+ containerRuntime = ENV[@@EnvContainerRuntime]
+ if !containerRuntime.nil? && !containerRuntime.empty?
+ # DockerVersion field holds either containerRuntime for non-docker or Dockerversion if its docker
+ @@CustomProperties["DockerVersion"] = containerRuntime
+ if containerRuntime.casecmp("docker") == 0
+ dockerInfo = DockerApiClient.dockerInfo
+ if (!dockerInfo.nil? && !dockerInfo.empty?)
+ @@CustomProperties["DockerVersion"] = dockerInfo["Version"]
+ end
+ end
end
end
+ rescue => errorStr
+ $log.warn("Exception in AppInsightsUtility: getContainerRuntimeInfo - error: #{errorStr}")
end
end
@@ -262,7 +274,7 @@ def sendMetricTelemetry(metricName, metricValue, properties)
end
def getWorkspaceId()
- begin
+ begin
workspaceId = ENV["WSID"]
if workspaceId.nil? || workspaceId.empty?
$log.warn("Exception in AppInsightsUtility: getWorkspaceId - WorkspaceID either nil or empty")
@@ -274,7 +286,7 @@ def getWorkspaceId()
end
def getWorkspaceCloud()
- begin
+ begin
workspaceDomain = ENV["DOMAIN"]
workspaceCloud = "AzureCloud"
if workspaceDomain.casecmp("opinsights.azure.com") == 0
diff --git a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
index f02459aef..10720752d 100644
--- a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
+++ b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb
@@ -38,7 +38,12 @@ class CAdvisorMetricsAPIClient
@npmIntegrationBasic = ENV["TELEMETRY_NPM_INTEGRATION_METRICS_BASIC"]
@npmIntegrationAdvanced = ENV["TELEMETRY_NPM_INTEGRATION_METRICS_ADVANCED"]
- @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
+ @os_type = ENV["OS_TYPE"]
+ if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+ @LogPath = "/etc/omsagentwindows/kubernetes_perf_log.txt"
+ else
+ @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
+ end
@Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
# @@rxBytesLast = nil
# @@rxBytesTimeLast = nil
@@ -142,39 +147,54 @@ def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601)
operatingSystem = "Linux"
end
if !metricInfo.nil?
- metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", Constants::MEMORY_WORKING_SET_BYTES, metricTime, operatingSystem))
- metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch", metricTime))
-
- if operatingSystem == "Linux"
- metricDataItems.concat(getContainerCpuMetricItems(metricInfo, hostName, "usageNanoCores", Constants::CPU_USAGE_NANO_CORES, metricTime))
- metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", Constants::MEMORY_RSS_BYTES, metricTime, operatingSystem))
- metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", Constants::MEMORY_RSS_BYTES, metricTime))
- elsif operatingSystem == "Windows"
+ # Checking if we are in windows daemonset and sending only few metrics that are needed for MDM
+ if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+ # Container metrics
+ metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", Constants::MEMORY_WORKING_SET_BYTES, metricTime, operatingSystem))
containerCpuUsageNanoSecondsRate = getContainerCpuMetricItemRate(metricInfo, hostName, "usageCoreNanoSeconds", Constants::CPU_USAGE_NANO_CORES, metricTime)
if containerCpuUsageNanoSecondsRate && !containerCpuUsageNanoSecondsRate.empty? && !containerCpuUsageNanoSecondsRate.nil?
metricDataItems.concat(containerCpuUsageNanoSecondsRate)
end
- end
+ # Node metrics
+ cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", Constants::CPU_USAGE_NANO_CORES, operatingSystem, metricTime)
+ if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil?
+ metricDataItems.push(cpuUsageNanoSecondsRate)
+ end
+ metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", Constants::MEMORY_WORKING_SET_BYTES, metricTime))
+ else
+ metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "workingSetBytes", Constants::MEMORY_WORKING_SET_BYTES, metricTime, operatingSystem))
+ metricDataItems.concat(getContainerStartTimeMetricItems(metricInfo, hostName, "restartTimeEpoch", metricTime))
+
+ if operatingSystem == "Linux"
+ metricDataItems.concat(getContainerCpuMetricItems(metricInfo, hostName, "usageNanoCores", Constants::CPU_USAGE_NANO_CORES, metricTime))
+ metricDataItems.concat(getContainerMemoryMetricItems(metricInfo, hostName, "rssBytes", Constants::MEMORY_RSS_BYTES, metricTime, operatingSystem))
+ metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "rssBytes", Constants::MEMORY_RSS_BYTES, metricTime))
+ elsif operatingSystem == "Windows"
+ containerCpuUsageNanoSecondsRate = getContainerCpuMetricItemRate(metricInfo, hostName, "usageCoreNanoSeconds", Constants::CPU_USAGE_NANO_CORES, metricTime)
+ if containerCpuUsageNanoSecondsRate && !containerCpuUsageNanoSecondsRate.empty? && !containerCpuUsageNanoSecondsRate.nil?
+ metricDataItems.concat(containerCpuUsageNanoSecondsRate)
+ end
+ end
- cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", Constants::CPU_USAGE_NANO_CORES, operatingSystem, metricTime)
- if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil?
- metricDataItems.push(cpuUsageNanoSecondsRate)
+ cpuUsageNanoSecondsRate = getNodeMetricItemRate(metricInfo, hostName, "cpu", "usageCoreNanoSeconds", Constants::CPU_USAGE_NANO_CORES, operatingSystem, metricTime)
+ if cpuUsageNanoSecondsRate && !cpuUsageNanoSecondsRate.empty? && !cpuUsageNanoSecondsRate.nil?
+ metricDataItems.push(cpuUsageNanoSecondsRate)
+ end
+ metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", Constants::MEMORY_WORKING_SET_BYTES, metricTime))
+
+ metricDataItems.push(getNodeLastRebootTimeMetric(metricInfo, hostName, "restartTimeEpoch", metricTime))
+ # Disabling networkRxRate and networkTxRate since we dont use it as of now.
+ #metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes"))
+ #metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "txBytes", "networkTxBytes"))
+ # networkRxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "rxBytes", "networkRxBytesPerSec")
+ # if networkRxRate && !networkRxRate.empty? && !networkRxRate.nil?
+ # metricDataItems.push(networkRxRate)
+ # end
+ # networkTxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "txBytes", "networkTxBytesPerSec")
+ # if networkTxRate && !networkTxRate.empty? && !networkTxRate.nil?
+ # metricDataItems.push(networkTxRate)
+ # end
end
- metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "memory", "workingSetBytes", Constants::MEMORY_WORKING_SET_BYTES, metricTime))
-
- metricDataItems.push(getNodeLastRebootTimeMetric(metricInfo, hostName, "restartTimeEpoch", metricTime))
-
- # Disabling networkRxRate and networkTxRate since we dont use it as of now.
- #metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "rxBytes", "networkRxBytes"))
- #metricDataItems.push(getNodeMetricItem(metricInfo, hostName, "network", "txBytes", "networkTxBytes"))
- # networkRxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "rxBytes", "networkRxBytesPerSec")
- # if networkRxRate && !networkRxRate.empty? && !networkRxRate.nil?
- # metricDataItems.push(networkRxRate)
- # end
- # networkTxRate = getNodeMetricItemRate(metricInfo, hostName, "network", "txBytes", "networkTxBytesPerSec")
- # if networkTxRate && !networkTxRate.empty? && !networkTxRate.nil?
- # metricDataItems.push(networkTxRate)
- # end
else
@Log.warn("Couldn't get metric information for host: #{hostName}")
end
@@ -203,7 +223,6 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
containerName = container["name"]
metricValue = container["cpu"][cpuMetricNameToCollect]
metricTime = metricPollTime #container["cpu"]["time"]
-
metricItem = {}
metricItem["Timestamp"] = metricTime
diff --git a/source/plugins/ruby/KubernetesApiClient.rb b/source/plugins/ruby/KubernetesApiClient.rb
index 3720bf6dc..4b50e20d8 100644
--- a/source/plugins/ruby/KubernetesApiClient.rb
+++ b/source/plugins/ruby/KubernetesApiClient.rb
@@ -25,7 +25,12 @@ class KubernetesApiClient
#@@IsValidRunningNode = nil
#@@IsLinuxCluster = nil
@@KubeSystemNamespace = "kube-system"
- @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt"
+ @os_type = ENV["OS_TYPE"]
+ if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+ @LogPath = "/etc/omsagentwindows/kubernetes_client_log.txt"
+ else
+ @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_client_log.txt"
+ end
@Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
@@TokenFileName = "/var/run/secrets/kubernetes.io/serviceaccount/token"
@@TokenStr = nil
diff --git a/source/plugins/ruby/MdmMetricsGenerator.rb b/source/plugins/ruby/MdmMetricsGenerator.rb
index a809087dc..73cf19fac 100644
--- a/source/plugins/ruby/MdmMetricsGenerator.rb
+++ b/source/plugins/ruby/MdmMetricsGenerator.rb
@@ -10,7 +10,12 @@ class MdmMetricsGenerator
require_relative "constants"
require_relative "oms_common"
- @log_path = "/var/opt/microsoft/docker-cimprov/log/mdm_metrics_generator.log"
+ @os_type = ENV["OS_TYPE"]
+ if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+ @log_path = "/etc/omsagentwindows/mdm_metrics_generator.log"
+ else
+ @log_path = "/var/opt/microsoft/docker-cimprov/log/mdm_metrics_generator.log"
+ end
@log = Logger.new(@log_path, 1, 5000000)
@@hostName = (OMS::Common.get_hostname)
diff --git a/source/plugins/ruby/arc_k8s_cluster_identity.rb b/source/plugins/ruby/arc_k8s_cluster_identity.rb
index 552dafb1f..39b8c1c96 100644
--- a/source/plugins/ruby/arc_k8s_cluster_identity.rb
+++ b/source/plugins/ruby/arc_k8s_cluster_identity.rb
@@ -18,15 +18,20 @@ class ArcK8sClusterIdentity
@@crd_resource_uri_template = "%{kube_api_server_url}/apis/%{cluster_config_crd_api_version}/namespaces/%{cluster_identity_resource_namespace}/azureclusteridentityrequests/%{cluster_identity_resource_name}"
@@secret_resource_uri_template = "%{kube_api_server_url}/api/v1/namespaces/%{cluster_identity_token_secret_namespace}/secrets/%{token_secret_name}"
@@azure_monitor_custom_metrics_audience = "https://monitoring.azure.com/"
- @@cluster_identity_request_kind = "AzureClusterIdentityRequest"
+ @@cluster_identity_request_kind = "AzureClusterIdentityRequest"
def initialize
- @LogPath = "/var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log"
+ @os_type = ENV["OS_TYPE"]
+ if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+ @LogPath = "/etc/omsagentwindows/arc_k8s_cluster_identity.log"
+ else
+ @LogPath = "/var/opt/microsoft/docker-cimprov/log/arc_k8s_cluster_identity.log"
+ end
@log = Logger.new(@LogPath, 1, 5000000)
@log.info "initialize start @ #{Time.now.utc.iso8601}"
@token_expiry_time = Time.now
@cached_access_token = String.new
- @isLastTokenRenewalUpdatePending = false
+ @isLastTokenRenewalUpdatePending = false
@token_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
@cert_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
@kube_api_server_url = KubernetesApiClient.getKubeAPIServerUrl
@@ -34,8 +39,8 @@ def initialize
@log.warn "got api server url nil from KubernetesApiClient.getKubeAPIServerUrl @ #{Time.now.utc.iso8601}"
end
@http_client = get_http_client
- @service_account_token = get_service_account_token
- @extensionName = ENV["ARC_K8S_EXTENSION_NAME"]
+ @service_account_token = get_service_account_token
+ @extensionName = ENV["ARC_K8S_EXTENSION_NAME"]
@log.info "extension name:#{@extensionName} @ #{Time.now.utc.iso8601}"
@log.info "initialize complete @ #{Time.now.utc.iso8601}"
end
@@ -55,7 +60,7 @@ def get_cluster_identity_token()
@isLastTokenRenewalUpdatePending = true
else
@log.warn "last token renewal update still pending @ #{Time.now.utc.iso8601}"
- end
+ end
end
@log.info "get token reference from crd @ #{Time.now.utc.iso8601}"
tokenReference = get_token_reference_from_crd
@@ -68,7 +73,7 @@ def get_cluster_identity_token()
token = get_token_from_secret(token_secret_name, token_secret_data_name)
if !token.nil?
@cached_access_token = token
- @isLastTokenRenewalUpdatePending = false
+ @isLastTokenRenewalUpdatePending = false
else
@log.warn "got token nil from secret: #{@token_secret_name}"
end
@@ -141,7 +146,7 @@ def get_token_reference_from_crd()
create_request.body = crd_request_body_json
create_response = @http_client.request(create_request)
@log.info "Got response of #{create_response.code} for POST #{crd_request_uri} @ #{Time.now.utc.iso8601}"
- end
+ end
rescue => err
@log.warn "get_token_reference_from_crd call failed: #{err}"
ApplicationInsightsUtility.sendExceptionTelemetry(err, { "FeatureArea" => "MDM" })
@@ -159,7 +164,7 @@ def renew_near_expiry_token()
cluster_identity_resource_namespace: @@cluster_identity_resource_namespace,
cluster_identity_resource_name: @@cluster_identity_resource_name,
}
- update_crd_request_body = { 'status': {'expirationTime': ''} }
+ update_crd_request_body = { 'status': { 'expirationTime': "" } }
update_crd_request_body_json = update_crd_request_body.to_json
update_crd_request_uri = crd_request_uri + "/status"
update_request = Net::HTTP::Patch.new(update_crd_request_uri)
@@ -234,9 +239,9 @@ def get_crd_request_body
body["metadata"]["namespace"] = @@cluster_identity_resource_namespace
body["spec"] = {}
body["spec"]["audience"] = @@azure_monitor_custom_metrics_audience
- if !@extensionName.nil? && !@extensionName.empty?
- body["spec"]["resourceId"] = @extensionName
- end
+ if !@extensionName.nil? && !@extensionName.empty?
+ body["spec"]["resourceId"] = @extensionName
+ end
return body
end
end
diff --git a/source/plugins/ruby/filter_cadvisor2mdm.rb b/source/plugins/ruby/filter_cadvisor2mdm.rb
index 62dcf31dc..9c6b661b0 100644
--- a/source/plugins/ruby/filter_cadvisor2mdm.rb
+++ b/source/plugins/ruby/filter_cadvisor2mdm.rb
@@ -2,7 +2,7 @@
# frozen_string_literal: true
-require 'fluent/plugin/filter'
+require "fluent/plugin/filter"
module Fluent::Plugin
require "logger"
@@ -28,6 +28,12 @@ class CAdvisor2MdmFilter < Filter
@@metric_threshold_hash = {}
@@controller_type = ""
+ @@isWindows = false
+ @@os_type = ENV["OS_TYPE"]
+ if !@@os_type.nil? && !@@os_type.empty? && @@os_type.strip.casecmp("windows") == 0
+ @@isWindows = true
+ end
+
def initialize
super
end
@@ -130,15 +136,17 @@ def flushMetricTelemetry
# Also send for PV usage metrics
begin
- pvTimeDifference = (DateTime.now.to_time.to_i - @@pvUsageTelemetryTimeTracker).abs
- pvTimeDifferenceInMinutes = pvTimeDifference / 60
- if (pvTimeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
- pvProperties = {}
- pvProperties["PVUsageThresholdPercentage"] = @@metric_threshold_hash[Constants::PV_USED_BYTES]
- pvProperties["PVUsageThresholdExceededInLastFlushInterval"] = @pvExceededUsageThreshold
- ApplicationInsightsUtility.sendCustomEvent(Constants::PV_USAGE_HEART_BEAT_EVENT, pvProperties)
- @pvExceededUsageThreshold = false
- @@pvUsageTelemetryTimeTracker = DateTime.now.to_time.to_i
+ if !@@isWindows.nil? && @@isWindows == false
+ pvTimeDifference = (DateTime.now.to_time.to_i - @@pvUsageTelemetryTimeTracker).abs
+ pvTimeDifferenceInMinutes = pvTimeDifference / 60
+ if (pvTimeDifferenceInMinutes >= Constants::TELEMETRY_FLUSH_INTERVAL_IN_MINUTES)
+ pvProperties = {}
+ pvProperties["PVUsageThresholdPercentage"] = @@metric_threshold_hash[Constants::PV_USED_BYTES]
+ pvProperties["PVUsageThresholdExceededInLastFlushInterval"] = @pvExceededUsageThreshold
+ ApplicationInsightsUtility.sendCustomEvent(Constants::PV_USAGE_HEART_BEAT_EVENT, pvProperties)
+ @pvExceededUsageThreshold = false
+ @@pvUsageTelemetryTimeTracker = DateTime.now.to_time.to_i
+ end
end
rescue => errorStr
@log.info "Error in flushMetricTelemetry: #{errorStr} for PV usage telemetry"
@@ -346,7 +354,6 @@ def ensure_cpu_memory_capacity_set
# cpu_capacity and memory_capacity keep initialized value of 0.0
@log.error "Error getting capacity_from_kubelet: cpu_capacity and memory_capacity"
end
-
end
end
diff --git a/source/plugins/ruby/in_cadvisor_perf.rb b/source/plugins/ruby/in_cadvisor_perf.rb
index 781042cea..b3f9bd08b 100644
--- a/source/plugins/ruby/in_cadvisor_perf.rb
+++ b/source/plugins/ruby/in_cadvisor_perf.rb
@@ -1,16 +1,20 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-require 'fluent/plugin/input'
+require "fluent/plugin/input"
module Fluent::Plugin
-
class CAdvisor_Perf_Input < Input
Fluent::Plugin.register_input("cadvisor_perf", self)
+ @@isWindows = false
+ @@os_type = ENV["OS_TYPE"]
+ if !@@os_type.nil? && !@@os_type.empty? && @@os_type.strip.casecmp("windows") == 0
+ @@isWindows = true
+ end
def initialize
super
require "yaml"
- require 'yajl/json_gem'
+ require "yajl/json_gem"
require "time"
require_relative "CAdvisorMetricsAPIClient"
@@ -69,31 +73,32 @@ def enumerate()
router.emit_stream(@containerhealthtag, eventStream) if eventStream
router.emit_stream(@nodehealthtag, eventStream) if eventStream
-
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
$log.info("cAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
end
#start GPU InsightsMetrics items
begin
- containerGPUusageInsightsMetricsDataItems = []
- containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: nil, metricTime: batchTime))
+ if !@@isWindows.nil? && @@isWindows == false
+ containerGPUusageInsightsMetricsDataItems = []
+ containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: nil, metricTime: batchTime))
containerGPUusageInsightsMetricsDataItems.each do |insightsMetricsRecord|
insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord
end
- router.emit_stream(@insightsmetricstag, insightsMetricsEventStream) if insightsMetricsEventStream
- router.emit_stream(@mdmtag, insightsMetricsEventStream) if insightsMetricsEventStream
-
- if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
- $log.info("cAdvisorInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+ router.emit_stream(@insightsmetricstag, insightsMetricsEventStream) if insightsMetricsEventStream
+ router.emit_stream(@mdmtag, insightsMetricsEventStream) if insightsMetricsEventStream
+
+ if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && insightsMetricsEventStream.count > 0)
+ $log.info("cAdvisorInsightsMetricsEmitStreamSuccess @ #{Time.now.utc.iso8601}")
+ end
end
rescue => errorStr
$log.warn "Failed when processing GPU Usage metrics in_cadvisor_perf : #{errorStr}"
$log.debug_backtrace(errorStr.backtrace)
ApplicationInsightsUtility.sendExceptionTelemetry(errorStr)
- end
+ end
#end GPU InsightsMetrics items
rescue => errorStr
diff --git a/source/plugins/ruby/in_win_cadvisor_perf.rb b/source/plugins/ruby/in_win_cadvisor_perf.rb
index 61e823ea6..9ab2474b1 100644
--- a/source/plugins/ruby/in_win_cadvisor_perf.rb
+++ b/source/plugins/ruby/in_win_cadvisor_perf.rb
@@ -1,7 +1,7 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true
-require 'fluent/plugin/input'
+require "fluent/plugin/input"
module Fluent::Plugin
class Win_CAdvisor_Perf_Input < Input
@@ -20,7 +20,7 @@ def initialize
require_relative "oms_common"
require_relative "omslog"
require_relative "constants"
- @insightsMetricsTag = "oneagent.containerInsights.INSIGHTS_METRICS_BLOB"
+ @insightsMetricsTag = "oneagent.containerInsights.INSIGHTS_METRICS_BLOB"
end
config_param :run_interval, :time, :default => 60
@@ -57,7 +57,7 @@ def enumerate()
begin
timeDifference = (DateTime.now.to_time.to_i - @@winNodeQueryTimeTracker).abs
timeDifferenceInMinutes = timeDifference / 60
- @@istestvar = ENV["ISTEST"]
+ @@istestvar = ENV["ISTEST"]
#Resetting this cache so that it is populated with the current set of containers with every call
CAdvisorMetricsAPIClient.resetWinContainerIdCache()
@@ -79,7 +79,6 @@ def enumerate()
end
end
router.emit_stream(@tag, eventStream) if eventStream
- router.emit_stream(@mdmtag, eventStream) if eventStream
if (!@@istestvar.nil? && !@@istestvar.empty? && @@istestvar.casecmp("true") == 0 && eventStream.count > 0)
$log.info("winCAdvisorPerfEmitStreamSuccess @ #{Time.now.utc.iso8601}")
@@ -89,10 +88,10 @@ def enumerate()
begin
containerGPUusageInsightsMetricsDataItems = []
containerGPUusageInsightsMetricsDataItems.concat(CAdvisorMetricsAPIClient.getInsightsMetrics(winNode: winNode, metricTime: Time.now.utc.iso8601))
- insightsMetricsEventStream = Fluent::MultiEventStream.new
+ insightsMetricsEventStream = Fluent::MultiEventStream.new
containerGPUusageInsightsMetricsDataItems.each do |insightsMetricsRecord|
- insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord
+ insightsMetricsEventStream.add(time, insightsMetricsRecord) if insightsMetricsRecord
end
router.emit_stream(@insightsMetricsTag, insightsMetricsEventStream) if insightsMetricsEventStream
diff --git a/source/plugins/ruby/kubelet_utils.rb b/source/plugins/ruby/kubelet_utils.rb
index e2c731b79..22bc87c0e 100644
--- a/source/plugins/ruby/kubelet_utils.rb
+++ b/source/plugins/ruby/kubelet_utils.rb
@@ -9,7 +9,12 @@
require "bigdecimal"
class KubeletUtils
- @log_path = "/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log"
+ @os_type = ENV["OS_TYPE"]
+ if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
+ @log_path = "/etc/omsagentwindows/filter_cadvisor2mdm.log"
+ else
+ @log_path = "/var/opt/microsoft/docker-cimprov/log/filter_cadvisor2mdm.log"
+ end
@log = Logger.new(@log_path, 1, 5000000)
class << self