Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ additional questions or comments.

Note : The agent version(s) below has dates (ciprod<mmddyyyy>), which indicate the agent build dates (not release dates)

### 01/07/2020 -
##### Version microsoft/oms:ciprod01072020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod01072020
## Code change log
- Switch between 10255(old) and 10250(new) ports for cadvisor for older and newer versions of kubernetes
## Customer Impact
- Node cpu, node memory, container cpu and container memory metrics were obtained earlier by querying kubelet readonly port(http://$NODE_IP:10255). Agent now supports getting these metrics from kubelet port(https://$NODE_IP:10250) as well. During the agent startup, it checks for connectivity to kubelet port(https://$NODE_IP:10250), and if it fails the metrics source is defaulted to readonly port(http://$NODE_IP:10255).

### 12/04/2019 -
##### Version microsoft/oms:ciprod12042019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod12042019
- Fix scheduler for all input plugins
Expand Down
2 changes: 1 addition & 1 deletion installer/conf/telegraf.conf
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@
[[inputs.prometheus]]
name_prefix="container.azm.ms/"
## An array of urls to scrape metrics from.
urls = ["http://$NODE_IP:10255/metrics"]
urls = ["$CADVISOR_METRICS_URL"]
fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"]

metric_version = 2
Expand Down
53 changes: 43 additions & 10 deletions source/code/plugin/CAdvisorMetricsAPIClient.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# frozen_string_literal: true

class CAdvisorMetricsAPIClient
require 'yajl/json_gem'
require "yajl/json_gem"
require "logger"
require "net/http"
require "net/https"
Expand All @@ -29,6 +29,8 @@ class CAdvisorMetricsAPIClient
@dsPromFieldDropCount = ENV["TELEMETRY_DS_PROM_FIELDDROP_LENGTH"]
@dsPromUrlCount = ENV["TELEMETRY_DS_PROM_URLS_LENGTH"]

@cAdvisorMetricsSecurePort = ENV["IS_SECURE_CADVISOR_PORT"]

@LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt"
@Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M
# @@rxBytesLast = nil
Expand Down Expand Up @@ -63,13 +65,34 @@ def getSummaryStatsFromCAdvisor(winNode)
response = nil
@Log.info "Getting CAdvisor Uri"
begin
cAdvisorUri = getCAdvisorUri(winNode)
cAdvisorSecurePort = false
# Check to see if omsagent needs to use 10255(insecure) port or 10250(secure) port
if !@cAdvisorMetricsSecurePort.nil? && @cAdvisorMetricsSecurePort == "true"
cAdvisorSecurePort = true
end

cAdvisorUri = getCAdvisorUri(winNode, cAdvisorSecurePort)
bearerToken = File.read("/var/run/secrets/kubernetes.io/serviceaccount/token")
@Log.info "cAdvisorUri: #{cAdvisorUri}"

if !cAdvisorUri.nil?
uri = URI.parse(cAdvisorUri)
Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40 ) do |http|
cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
response = http.request(cAdvisorApiRequest)
@Log.info "Got response code #{response.code} from #{uri.request_uri}"
if !!cAdvisorSecurePort == true
Net::HTTP.start(uri.host, uri.port,
:use_ssl => true, :open_timeout => 20, :read_timeout => 40,
:ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
:verify_mode => OpenSSL::SSL::VERIFY_NONE) do |http|
cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
cAdvisorApiRequest["Authorization"] = "Bearer #{bearerToken}"
response = http.request(cAdvisorApiRequest)
@Log.info "Got response code #{response.code} from #{uri.request_uri}"
end
else
Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40) do |http|
cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
response = http.request(cAdvisorApiRequest)
@Log.info "Got response code #{response.code} from #{uri.request_uri}"
end
end
end
rescue => error
Expand All @@ -81,9 +104,14 @@ def getSummaryStatsFromCAdvisor(winNode)
return response
end

def getCAdvisorUri(winNode)
def getCAdvisorUri(winNode, cAdvisorSecurePort)
begin
defaultHost = "http://localhost:10255"
if !!cAdvisorSecurePort == true
defaultHost = "https://localhost:10250"
else
defaultHost = "http://localhost:10255"
end

relativeUri = "/stats/summary"
if !winNode.nil?
nodeIP = winNode["InternalIP"]
Expand All @@ -92,7 +120,11 @@ def getCAdvisorUri(winNode)
end
if !nodeIP.nil?
@Log.info("Using #{nodeIP + relativeUri} for CAdvisor Uri")
return "http://#{nodeIP}:10255" + relativeUri
if !!cAdvisorSecurePort == true
return "https://#{nodeIP}:10250" + relativeUri
else
return "http://#{nodeIP}:10255" + relativeUri
end
else
@Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost + relativeUri} ")
if !winNode.nil?
Expand All @@ -104,7 +136,7 @@ def getCAdvisorUri(winNode)
end
end

def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601 )
def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601)
metricDataItems = []
begin
cAdvisorStats = getSummaryStatsFromCAdvisor(winNode)
Expand Down Expand Up @@ -211,6 +243,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
telemetryProps["PodName"] = podName
telemetryProps["ContainerName"] = containerName
telemetryProps["Computer"] = hostName
telemetryProps["CAdvisorIsSecure"] = @cAdvisorMetricsSecurePort
#telemetry about log collections settings
if (File.file?(@configMapMountPath))
telemetryProps["clustercustomsettings"] = true
Expand Down