diff --git a/README.md b/README.md index 007f92d92..75b2d8665 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,13 @@ additional questions or comments. Note : The agent version(s) below has dates (ciprod), which indicate the agent build dates (not release dates) +### 01/07/2020 - +##### Version microsoft/oms:ciprod01072020 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod01072020 +## Code change log +- Switch between 10255(old) and 10250(new) ports for cadvisor for older and newer versions of kubernetes +## Customer Impact +- Node cpu, node memory, container cpu and container memory metrics were obtained earlier by querying kubelet readonly port(http://$NODE_IP:10255). Agent now supports getting these metrics from kubelet port(https://$NODE_IP:10250) as well. During the agent startup, it checks for connectivity to kubelet port(https://$NODE_IP:10250), and if it fails the metrics source is defaulted to readonly port(http://$NODE_IP:10255). + ### 12/04/2019 - ##### Version microsoft/oms:ciprod12042019 Version mcr.microsoft.com/azuremonitor/containerinsights/ciprod:ciprod12042019 - Fix scheduler for all input plugins diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf index cd22a56b4..f9dc3fb6a 100644 --- a/installer/conf/telegraf.conf +++ b/installer/conf/telegraf.conf @@ -531,7 +531,7 @@ [[inputs.prometheus]] name_prefix="container.azm.ms/" ## An array of urls to scrape metrics from. - urls = ["http://$NODE_IP:10255/metrics"] + urls = ["$CADVISOR_METRICS_URL"] fieldpass = ["kubelet_docker_operations", "kubelet_docker_operations_errors"] metric_version = 2 diff --git a/source/code/plugin/CAdvisorMetricsAPIClient.rb b/source/code/plugin/CAdvisorMetricsAPIClient.rb index be61b8b8f..8b0105a6f 100644 --- a/source/code/plugin/CAdvisorMetricsAPIClient.rb +++ b/source/code/plugin/CAdvisorMetricsAPIClient.rb @@ -2,7 +2,7 @@ # frozen_string_literal: true class CAdvisorMetricsAPIClient - require 'yajl/json_gem' + require "yajl/json_gem" require "logger" require "net/http" require "net/https" @@ -29,6 +29,8 @@ class CAdvisorMetricsAPIClient @dsPromFieldDropCount = ENV["TELEMETRY_DS_PROM_FIELDDROP_LENGTH"] @dsPromUrlCount = ENV["TELEMETRY_DS_PROM_URLS_LENGTH"] + @cAdvisorMetricsSecurePort = ENV["IS_SECURE_CADVISOR_PORT"] + @LogPath = "/var/opt/microsoft/docker-cimprov/log/kubernetes_perf_log.txt" @Log = Logger.new(@LogPath, 2, 10 * 1048576) #keep last 2 files, max log file size = 10M # @@rxBytesLast = nil @@ -63,13 +65,34 @@ def getSummaryStatsFromCAdvisor(winNode) response = nil @Log.info "Getting CAdvisor Uri" begin - cAdvisorUri = getCAdvisorUri(winNode) + cAdvisorSecurePort = false + # Check to see if omsagent needs to use 10255(insecure) port or 10250(secure) port + if !@cAdvisorMetricsSecurePort.nil? && @cAdvisorMetricsSecurePort == "true" + cAdvisorSecurePort = true + end + + cAdvisorUri = getCAdvisorUri(winNode, cAdvisorSecurePort) + bearerToken = File.read("/var/run/secrets/kubernetes.io/serviceaccount/token") + @Log.info "cAdvisorUri: #{cAdvisorUri}" + if !cAdvisorUri.nil? uri = URI.parse(cAdvisorUri) - Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40 ) do |http| - cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri) - response = http.request(cAdvisorApiRequest) - @Log.info "Got response code #{response.code} from #{uri.request_uri}" + if !!cAdvisorSecurePort == true + Net::HTTP.start(uri.host, uri.port, + :use_ssl => true, :open_timeout => 20, :read_timeout => 40, + :ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", + :verify_mode => OpenSSL::SSL::VERIFY_NONE) do |http| + cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri) + cAdvisorApiRequest["Authorization"] = "Bearer #{bearerToken}" + response = http.request(cAdvisorApiRequest) + @Log.info "Got response code #{response.code} from #{uri.request_uri}" + end + else + Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40) do |http| + cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri) + response = http.request(cAdvisorApiRequest) + @Log.info "Got response code #{response.code} from #{uri.request_uri}" + end end end rescue => error @@ -81,9 +104,14 @@ def getSummaryStatsFromCAdvisor(winNode) return response end - def getCAdvisorUri(winNode) + def getCAdvisorUri(winNode, cAdvisorSecurePort) begin - defaultHost = "http://localhost:10255" + if !!cAdvisorSecurePort == true + defaultHost = "https://localhost:10250" + else + defaultHost = "http://localhost:10255" + end + relativeUri = "/stats/summary" if !winNode.nil? nodeIP = winNode["InternalIP"] @@ -92,7 +120,11 @@ def getCAdvisorUri(winNode) end if !nodeIP.nil? @Log.info("Using #{nodeIP + relativeUri} for CAdvisor Uri") - return "http://#{nodeIP}:10255" + relativeUri + if !!cAdvisorSecurePort == true + return "https://#{nodeIP}:10250" + relativeUri + else + return "http://#{nodeIP}:10255" + relativeUri + end else @Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost + relativeUri} ") if !winNode.nil? @@ -104,7 +136,7 @@ def getCAdvisorUri(winNode) end end - def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601 ) + def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601) metricDataItems = [] begin cAdvisorStats = getSummaryStatsFromCAdvisor(winNode) @@ -211,6 +243,7 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met telemetryProps["PodName"] = podName telemetryProps["ContainerName"] = containerName telemetryProps["Computer"] = hostName + telemetryProps["CAdvisorIsSecure"] = @cAdvisorMetricsSecurePort #telemetry about log collections settings if (File.file?(@configMapMountPath)) telemetryProps["clustercustomsettings"] = true