Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
df13606
full switch to mdsd, upgrade to ruby v1 & omsagent removal
ganga1980 May 14, 2021
9edf933
add odsdirect as fallback option
ganga1980 May 14, 2021
bbecc2c
cleanup
ganga1980 May 14, 2021
836331a
cleanup
ganga1980 May 14, 2021
0178310
move customRegion to stage3
ganga1980 May 14, 2021
f29ec25
updates related to containerlog route
ganga1980 May 15, 2021
f3fbf3c
make xml eventschema consistent
ganga1980 May 15, 2021
673807f
add buffer settings
ganga1980 May 17, 2021
877ae9e
address HTTPServerException deprecation in ruby 2.6
ganga1980 May 17, 2021
90e9eeb
update to official mdsd version
ganga1980 May 17, 2021
5f4707f
merge with ganga's branch - gangams/aad-stage2-full-switch-to-mdsd
rashmichandrashekar May 17, 2021
7ecdbab
fix log message issue
ganga1980 May 17, 2021
90240af
fixes
rashmichandrashekar May 19, 2021
5cd47d7
fix pr feedback
ganga1980 May 19, 2021
a4dff09
get ridoff unused code from omscommon
ganga1980 May 19, 2021
2c8afa4
fix pr feedback
ganga1980 May 19, 2021
7b94989
fix pr feedback
ganga1980 May 19, 2021
1e148d1
merge ci_dev latest changes
ganga1980 May 19, 2021
b2fb9d8
clean up
ganga1980 May 19, 2021
a1c7658
clean up
ganga1980 May 19, 2021
df4898a
Merge branch 'ci_dev' into gangams/aad-stage2-full-switch-to-mdsd
ganga1980 May 19, 2021
d1de167
fix missing conf
ganga1980 May 20, 2021
8d5e61d
ai auth env
rashmichandrashekar May 21, 2021
0c3cf8c
merging with Ganga's latesstnges
rashmichandrashekar May 21, 2021
1e0795e
remove error
rashmichandrashekar May 24, 2021
5c56058
merging with ci_dev
rashmichandrashekar May 25, 2021
553465b
remove mdm metric emit
rashmichandrashekar May 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
#!/usr/local/bin/ruby
# frozen_string_literal: true

require_relative "tomlrb"
require_relative "ConfigParseErrorLogger"
#this should be require relative in Linux and require in windows, since it is a gem install on windows
@os_type = ENV["OS_TYPE"]
if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
require "tomlrb"
else
require_relative "tomlrb"
end

require_relative "/etc/fluent/plugin/constants"
require_relative "ConfigParseErrorLogger"

@configMapMountPath = "/etc/config/settings/alertable-metrics-configuration-settings"
@configVersion = ""
Expand Down Expand Up @@ -124,6 +131,10 @@ def populateSettingValuesFromConfigMap(parsedConfig)
end
end

def get_command_windows(env_variable_name, env_variable_value)
return "[System.Environment]::SetEnvironmentVariable(\"#{env_variable_name}\", \"#{env_variable_value}\", \"Process\")" + "\n" + "[System.Environment]::SetEnvironmentVariable(\"#{env_variable_name}\", \"#{env_variable_value}\", \"Machine\")" + "\n"
end

@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"]
puts "****************Start MDM Metrics Config Processing********************"
if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version, so hardcoding it
Expand All @@ -137,19 +148,37 @@ def populateSettingValuesFromConfigMap(parsedConfig)
end
end

# Write the settings to file, so that they can be set as environment variables
file = File.open("config_mdm_metrics_env_var", "w")
if !@os_type.nil? && !@os_type.empty? && @os_type.strip.casecmp("windows") == 0
# Write the settings to file, so that they can be set as environment variables in windows container
file = File.open("setmdmenv.ps1", "w")

if !file.nil?
file.write("export AZMON_ALERT_CONTAINER_CPU_THRESHOLD=#{@percentageCpuUsageThreshold}\n")
file.write("export AZMON_ALERT_CONTAINER_MEMORY_RSS_THRESHOLD=#{@percentageMemoryRssThreshold}\n")
file.write("export AZMON_ALERT_CONTAINER_MEMORY_WORKING_SET_THRESHOLD=\"#{@percentageMemoryWorkingSetThreshold}\"\n")
file.write("export AZMON_ALERT_PV_USAGE_THRESHOLD=#{@percentagePVUsageThreshold}\n")
file.write("export AZMON_ALERT_JOB_COMPLETION_TIME_THRESHOLD=#{@jobCompletionThresholdMinutes}\n")
# Close file after writing all MDM setting environment variables
file.close
puts "****************End MDM Metrics Config Processing********************"
if !file.nil?
commands = get_command_windows("AZMON_ALERT_CONTAINER_CPU_THRESHOLD", @percentageCpuUsageThreshold)
file.write(commands)
commands = get_command_windows("AZMON_ALERT_CONTAINER_MEMORY_WORKING_SET_THRESHOLD", @percentageMemoryWorkingSetThreshold)
file.write(commands)
# Close file after writing all environment variables
file.close
puts "****************End MDM Metrics Config Processing********************"
else
puts "Exception while opening file for writing MDM metric config environment variables"
puts "****************End MDM Metrics Config Processing********************"
end
else
puts "Exception while opening file for writing MDM metric config environment variables"
puts "****************End MDM Metrics Config Processing********************"
# Write the settings to file, so that they can be set as environment variables in linux container
file = File.open("config_mdm_metrics_env_var", "w")

if !file.nil?
file.write("export AZMON_ALERT_CONTAINER_CPU_THRESHOLD=#{@percentageCpuUsageThreshold}\n")
file.write("export AZMON_ALERT_CONTAINER_MEMORY_RSS_THRESHOLD=#{@percentageMemoryRssThreshold}\n")
file.write("export AZMON_ALERT_CONTAINER_MEMORY_WORKING_SET_THRESHOLD=\"#{@percentageMemoryWorkingSetThreshold}\"\n")
file.write("export AZMON_ALERT_PV_USAGE_THRESHOLD=#{@percentagePVUsageThreshold}\n")
file.write("export AZMON_ALERT_JOB_COMPLETION_TIME_THRESHOLD=#{@jobCompletionThresholdMinutes}\n")
# Close file after writing all MDM setting environment variables
file.close
puts "****************End MDM Metrics Config Processing********************"
else
puts "Exception while opening file for writing MDM metric config environment variables"
puts "****************End MDM Metrics Config Processing********************"
end
end
2 changes: 1 addition & 1 deletion build/linux/installer/datafiles/base_container.data
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ MAINTAINER: 'Microsoft Corporation'
/opt/microsoft/docker-cimprov/bin/TelegrafTCPErrorTelemetry.sh; build/linux/installer/scripts/TelegrafTCPErrorTelemetry.sh; 755; root; root
/opt/livenessprobe.sh; build/linux/installer/scripts/livenessprobe.sh; 755; root; root
/opt/tomlparser-prom-customconfig.rb; build/common/installer/scripts/tomlparser-prom-customconfig.rb; 755; root; root
/opt/tomlparser-mdm-metrics-config.rb; build/linux/installer/scripts/tomlparser-mdm-metrics-config.rb; 755; root; root
/opt/tomlparser-mdm-metrics-config.rb; build/common/installer/scripts/tomlparser-mdm-metrics-config.rb; 755; root; root
/opt/tomlparser-metric-collection-config.rb; build/linux/installer/scripts/tomlparser-metric-collection-config.rb; 755; root; root


Expand Down
10 changes: 10 additions & 0 deletions build/windows/Makefile.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -180,4 +180,14 @@ $exclude = @('*.cs','*.csproj')
Copy-Item -Path $installerdir -Destination $publishdir -Recurse -Force -Exclude $exclude
Write-Host("successfully copied installer files conf and scripts from :" + $installerdir + " to :" + $publishdir + " ") -ForegroundColor Green

$rubyplugindir = Join-Path -Path $rootdir -ChildPath "source\plugins\ruby"
Write-Host("copying ruby source files from :" + $rubyplugindir + " to :" + $publishdir + " ...")
Copy-Item -Path $rubyplugindir -Destination $publishdir -Recurse -Force
Write-Host("successfully copied ruby source files from :" + $rubyplugindir + " to :" + $publishdir + " ") -ForegroundColor Green

$utilsplugindir = Join-Path -Path $rootdir -ChildPath "source\plugins\utils"
Write-Host("copying ruby util files from :" + $utilsplugindir + " to :" + $publishdir + " ...")
Copy-Item -Path $utilsplugindir -Destination $publishdir -Recurse -Force
Write-Host("successfully copied ruby util files from :" + $utilsplugindir + " to :" + $publishdir + " ") -ForegroundColor Green

Set-Location $currentdir
32 changes: 32 additions & 0 deletions build/windows/installer/conf/fluent.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
@log_level info
</source>

<source>
@type cadvisor_perf
tag oms.api.cadvisorperf
run_interval 60
@log_level debug
</source>

<source>
@type tail
path "#{ENV['AZMON_LOG_TAIL_PATH']}"
Expand All @@ -29,6 +36,14 @@
@include fluent-docker-parser.conf
</source>

#custom_metrics_mdm filter plugin
<filter mdm.cadvisorperf**>
@type cadvisor2mdm
metrics_to_collect cpuUsageNanoCores,memoryWorkingSetBytes
log_path /etc/omsagentwindows/filter_cadvisor2mdm.log
@log_level info
</filter>

<filter oms.container.log.la>
@type grep
<exclude>
Expand All @@ -46,6 +61,23 @@
</record>
</filter>

<match mdm.cadvisorperf**>
@type mdm
@log_level debug
<buffer>
@type file
path /etc/omsagentwindows/out_mdm_cdvisorperf.buffer
overflow_action drop_oldest_chunk
chunk_limit_size 4m
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, we should add queue_limit_length 20 similar to all other plugins. This will help us to buffer upto 80MB in case of transient failures.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have reused the configuration we have in Linux. Is there any reason why this was not in Linux and should be included in Windows?

flush_interval 20s
retry_max_times 10
retry_wait 5s
retry_max_interval 5m
flush_thread_count 5
</buffer>
retry_mdm_post_wait_minutes 30
</match>

<match oms.container.**>
@type forward
send_timeout 60s
Expand Down
9 changes: 9 additions & 0 deletions kubernetes/omsagent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,9 @@ spec:
fieldPath: status.hostIP
- name: SIDECAR_SCRAPING_ENABLED
value: "true"
# Update this with the user assigned msi client id for omsagent
- name: USER_ASSIGNED_IDENTITY_CLIENT_ID
value: ""
# Add this only for clouds that require cert bootstrapping
- name: REQUIRES_CERT_BOOTSTRAP
value: "true"
Expand All @@ -812,6 +815,9 @@ spec:
# - mountPath: C:\ca
# name: ca-certs
# readOnly: true
- mountPath: C:\etc\kubernetes\host
name: azure-json-path
readOnly: true
livenessProbe:
exec:
command:
Expand Down Expand Up @@ -843,6 +849,9 @@ spec:
- name: docker-windows-kuberenetes-container-logs
hostPath:
path: C:\var
- name: azure-json-path
hostPath:
path: C:\k
# Need to mount this only for airgapped clouds - Commenting this since it wont exist in non airgapped clouds
#- name: ca-certs
# hostPath:
Expand Down
6 changes: 5 additions & 1 deletion kubernetes/windows/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ RUN refreshenv \
&& gem install cool.io -v 1.5.4 --platform ruby \
&& gem install oj -v 3.3.10 \
&& gem install json -v 2.2.0 \
&& gem install fluentd -v 1.10.2 \
&& gem install fluentd -v 1.12.2 \
&& gem install win32-service -v 1.0.1 \
&& gem install win32-ipc -v 0.7.0 \
&& gem install win32-event -v 0.6.3 \
Expand Down Expand Up @@ -69,6 +69,10 @@ COPY ./omsagentwindows/installer/conf/telegraf.conf /etc/telegraf/
# copy keepcert alive ruby scripts
COPY ./omsagentwindows/installer/scripts/rubyKeepCertificateAlive/*.rb /etc/fluent/plugin/

#Copy fluentd ruby plugins
COPY ./omsagentwindows/ruby/ /etc/fluent/plugin/
COPY ./omsagentwindows/utils/*.rb /etc/fluent/plugin/

ENV AGENT_VERSION ${IMAGE_TAG}
ENV OS_TYPE "windows"
ENV APPLICATIONINSIGHTS_AUTH "NzAwZGM5OGYtYTdhZC00NThkLWI5NWMtMjA3ZjM3NmM3YmRi"
Expand Down
66 changes: 63 additions & 3 deletions kubernetes/windows/main.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,6 @@ function Set-EnvironmentVariables {
[System.Environment]::SetEnvironmentVariable("APPLICATIONINSIGHTS_ENDPOINT", $appInsightsEndpoint, "machine")
Write-Host "Successfully set environment variable APPLICATIONINSIGHTS_ENDPOINT - $($appInsightsEndpoint) for target 'machine'..."
}
else {
Write-Host "Failed to set environment variable APPLICATIONINSIGHTS_ENDPOINT for target 'machine' since it is either null or empty"
}

# Check if the instrumentation key needs to be fetched from a storage account (as in airgapped clouds)
$aiKeyURl = [System.Environment]::GetEnvironmentVariable('APPLICATIONINSIGHTS_AUTH_URL')
Expand Down Expand Up @@ -180,14 +177,71 @@ function Set-EnvironmentVariables {
[System.Environment]::SetEnvironmentVariable("TELEMETRY_APPLICATIONINSIGHTS_KEY", $aiKeyDecoded, "Process")
[System.Environment]::SetEnvironmentVariable("TELEMETRY_APPLICATIONINSIGHTS_KEY", $aiKeyDecoded, "Machine")

# Setting environment variables required by the fluentd plugins
$aksResourceId = [System.Environment]::GetEnvironmentVariable("AKS_RESOURCE_ID", "process")
if (![string]::IsNullOrEmpty($aksResourceId)) {
[System.Environment]::SetEnvironmentVariable("AKS_RESOURCE_ID", $aksResourceId, "machine")
Write-Host "Successfully set environment variable AKS_RESOURCE_ID - $($aksResourceId) for target 'machine'..."
}
else {
Write-Host "Failed to set environment variable AKS_RESOURCE_ID for target 'machine' since it is either null or empty"
}

$aksRegion = [System.Environment]::GetEnvironmentVariable("AKS_REGION", "process")
if (![string]::IsNullOrEmpty($aksRegion)) {
[System.Environment]::SetEnvironmentVariable("AKS_REGION", $aksRegion, "machine")
Write-Host "Successfully set environment variable AKS_REGION - $($aksRegion) for target 'machine'..."
}
else {
Write-Host "Failed to set environment variable AKS_REGION for target 'machine' since it is either null or empty"
}

$controllerType = [System.Environment]::GetEnvironmentVariable("CONTROLLER_TYPE", "process")
if (![string]::IsNullOrEmpty($controllerType)) {
[System.Environment]::SetEnvironmentVariable("CONTROLLER_TYPE", $controllerType, "machine")
Write-Host "Successfully set environment variable CONTROLLER_TYPE - $($controllerType) for target 'machine'..."
}
else {
Write-Host "Failed to set environment variable CONTROLLER_TYPE for target 'machine' since it is either null or empty"
}

$osType = [System.Environment]::GetEnvironmentVariable("OS_TYPE", "process")
if (![string]::IsNullOrEmpty($osType)) {
[System.Environment]::SetEnvironmentVariable("OS_TYPE", $osType, "machine")
Write-Host "Successfully set environment variable OS_TYPE - $($osType) for target 'machine'..."
}
else {
Write-Host "Failed to set environment variable OS_TYPE for target 'machine' since it is either null or empty"
}

$userMsi = [System.Environment]::GetEnvironmentVariable("USER_ASSIGNED_IDENTITY_CLIENT_ID", "process")
if (![string]::IsNullOrEmpty($userMsi)) {
[System.Environment]::SetEnvironmentVariable("USER_ASSIGNED_IDENTITY_CLIENT_ID", $userMsi, "machine")
Write-Host "Successfully set environment variable USER_ASSIGNED_IDENTITY_CLIENT_ID - $($userMsi) for target 'machine'..."
}

$hostName = [System.Environment]::GetEnvironmentVariable("HOSTNAME", "process")
if (![string]::IsNullOrEmpty($hostName)) {
[System.Environment]::SetEnvironmentVariable("HOSTNAME", $hostName, "machine")
Write-Host "Successfully set environment variable HOSTNAME - $($hostName) for target 'machine'..."
}
else {
Write-Host "Failed to set environment variable HOSTNAME for target 'machine' since it is either null or empty"
}

# run config parser
ruby /opt/omsagentwindows/scripts/ruby/tomlparser.rb
.\setenv.ps1

# run mdm config parser
ruby /opt/omsagentwindows/scripts/ruby/tomlparser-mdm-metrics-config.rb
.\setmdmenv.ps1
}

function Get-ContainerRuntime {
# default container runtime and make default as containerd when containerd becomes default in AKS
$containerRuntime = "docker"
$cAdvisorIsSecure = "false"
$response = ""
$NODE_IP = ""
try {
Expand Down Expand Up @@ -227,13 +281,19 @@ function Get-ContainerRuntime {
if (![string]::IsNullOrEmpty($response) -and $response.StatusCode -eq 200) {
Write-Host "API call to https://$($NODE_IP):10250/pods succeeded"
$isPodsAPISuccess = $true
$cAdvisorIsSecure = "true"
}
}
catch {
Write-Host "API call to https://$($NODE_IP):10250/pods failed"
}
}

# set IS_SECURE_CADVISOR_PORT env for debug and telemetry purpose
Write-Host "Setting IS_SECURE_CADVISOR_PORT environment variable as $($cAdvisorIsSecure)"
[System.Environment]::SetEnvironmentVariable("IS_SECURE_CADVISOR_PORT", $cAdvisorIsSecure, "Process")
[System.Environment]::SetEnvironmentVariable("IS_SECURE_CADVISOR_PORT", $cAdvisorIsSecure, "Machine")

if ($isPodsAPISuccess) {
if (![string]::IsNullOrEmpty($response.Content)) {
$podList = $response.Content | ConvertFrom-Json
Expand Down
36 changes: 24 additions & 12 deletions source/plugins/ruby/ApplicationInsightsUtility.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,15 @@ class ApplicationInsightsUtility
@@EnvApplicationInsightsEndpoint = "APPLICATIONINSIGHTS_ENDPOINT"
@@EnvControllerType = "CONTROLLER_TYPE"
@@EnvContainerRuntime = "CONTAINER_RUNTIME"

@@isWindows = false
@@hostName = (OMS::Common.get_hostname)
@@os_type = ENV["OS_TYPE"]
if !@@os_type.nil? && !@@os_type.empty? && @@os_type.strip.casecmp("windows") == 0
@@isWindows = true
@@hostName = ENV["HOSTNAME"]
end
@@CustomProperties = {}
@@Tc = nil
@@hostName = (OMS::Common.get_hostname)
@@proxy = (ProxyUtils.getProxyConfiguration)

def initialize
Expand Down Expand Up @@ -133,16 +138,23 @@ def initializeUtility()
end

def getContainerRuntimeInfo()
containerRuntime = ENV[@@EnvContainerRuntime]
if !containerRuntime.nil? && !containerRuntime.empty?
# DockerVersion field holds either containerRuntime for non-docker or Dockerversion if its docker
@@CustomProperties["DockerVersion"] = containerRuntime
if containerRuntime.casecmp("docker") == 0
dockerInfo = DockerApiClient.dockerInfo
if (!dockerInfo.nil? && !dockerInfo.empty?)
@@CustomProperties["DockerVersion"] = dockerInfo["Version"]
begin
# Not doing this for windows since docker is being deprecated soon and we dont want to bring in the socket dependency.
if !@@isWindows.nil? && @@isWindows == false
containerRuntime = ENV[@@EnvContainerRuntime]
if !containerRuntime.nil? && !containerRuntime.empty?
# DockerVersion field holds either containerRuntime for non-docker or Dockerversion if its docker
@@CustomProperties["DockerVersion"] = containerRuntime
if containerRuntime.casecmp("docker") == 0
dockerInfo = DockerApiClient.dockerInfo
if (!dockerInfo.nil? && !dockerInfo.empty?)
@@CustomProperties["DockerVersion"] = dockerInfo["Version"]
end
end
end
end
rescue => errorStr
$log.warn("Exception in AppInsightsUtility: getContainerRuntimeInfo - error: #{errorStr}")
end
end

Expand Down Expand Up @@ -262,7 +274,7 @@ def sendMetricTelemetry(metricName, metricValue, properties)
end

def getWorkspaceId()
begin
begin
workspaceId = ENV["WSID"]
if workspaceId.nil? || workspaceId.empty?
$log.warn("Exception in AppInsightsUtility: getWorkspaceId - WorkspaceID either nil or empty")
Expand All @@ -274,7 +286,7 @@ def getWorkspaceId()
end

def getWorkspaceCloud()
begin
begin
workspaceDomain = ENV["DOMAIN"]
workspaceCloud = "AzureCloud"
if workspaceDomain.casecmp("opinsights.azure.com") == 0
Expand Down
Loading