Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 67 additions & 64 deletions source/plugins/ruby/CAdvisorMetricsAPIClient.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class CAdvisorMetricsAPIClient
#cadvisor ports
@@CADVISOR_SECURE_PORT = "10250"
@@CADVISOR_NON_SECURE_PORT = "10255"

def initialize
end

Expand All @@ -86,40 +87,40 @@ def getPodsFromCAdvisor(winNode: nil)
end

def getBaseCAdvisorUri(winNode)
cAdvisorSecurePort = isCAdvisorOnSecurePort()
cAdvisorSecurePort = isCAdvisorOnSecurePort()

if !!cAdvisorSecurePort == true
defaultHost = "https://localhost:#{@@CADVISOR_SECURE_PORT}"
else
defaultHost = "http://localhost:#{@@CADVISOR_NON_SECURE_PORT}"
end

if !winNode.nil?
nodeIP = winNode["InternalIP"]
else
nodeIP = ENV["NODE_IP"]
end

if !nodeIP.nil?
@Log.info("Using #{nodeIP} for CAdvisor Host")
if !!cAdvisorSecurePort == true
defaultHost = "https://localhost:#{@@CADVISOR_SECURE_PORT}"
return "https://#{nodeIP}:#{@@CADVISOR_SECURE_PORT}"
else
defaultHost = "http://localhost:#{@@CADVISOR_NON_SECURE_PORT}"
return "http://#{nodeIP}:#{@@CADVISOR_NON_SECURE_PORT}"
end

else
@Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost}")
if !winNode.nil?
nodeIP = winNode["InternalIP"]
else
nodeIP = ENV["NODE_IP"]
end

if !nodeIP.nil?
@Log.info("Using #{nodeIP} for CAdvisor Host")
if !!cAdvisorSecurePort == true
return "https://#{nodeIP}:#{@@CADVISOR_SECURE_PORT}"
else
return "http://#{nodeIP}:#{@@CADVISOR_NON_SECURE_PORT}"
end
return nil
else
@Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost}")
if !winNode.nil?
return nil
else
return defaultHost
end
return defaultHost
end
end
end

def getCAdvisorUri(winNode, relativeUri)
baseUri = getBaseCAdvisorUri(winNode)
return baseUri + relativeUri
baseUri = getBaseCAdvisorUri(winNode)
return baseUri + relativeUri
end

def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601)
Expand Down Expand Up @@ -254,20 +255,20 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met
elsif (!@containerLogsRoute.nil? && !@containerLogsRoute.empty?)
telemetryProps["containerLogsRoute"] = @containerLogsRoute
end
#telemetry about health model
if (!@hmEnabled.nil? && !@hmEnabled.empty?)
#telemetry about health model
if (!@hmEnabled.nil? && !@hmEnabled.empty?)
telemetryProps["hmEnabled"] = @hmEnabled
end
#telemetry for npm integration
if (!@npmIntegrationAdvanced.nil? && !@npmIntegrationAdvanced.empty?)
telemetryProps["int-npm-a"] = "1"
elsif (!@npmIntegrationBasic.nil? && !@npmIntegrationBasic.empty?)
telemetryProps["int-npm-b"] = "1"
end
#telemetry for Container log schema version clusterContainerLogSchemaVersion
if (!@clusterContainerLogSchemaVersion.nil? && !@clusterContainerLogSchemaVersion.empty?)
end
#telemetry for npm integration
if (!@npmIntegrationAdvanced.nil? && !@npmIntegrationAdvanced.empty?)
telemetryProps["int-npm-a"] = "1"
elsif (!@npmIntegrationBasic.nil? && !@npmIntegrationBasic.empty?)
telemetryProps["int-npm-b"] = "1"
end
#telemetry for Container log schema version clusterContainerLogSchemaVersion
if (!@clusterContainerLogSchemaVersion.nil? && !@clusterContainerLogSchemaVersion.empty?)
telemetryProps["containerLogVer"] = @clusterContainerLogSchemaVersion
end
end
ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps)
end
end
Expand Down Expand Up @@ -308,8 +309,8 @@ def getInsightsMetrics(winNode: nil, metricTime: Time.now.utc.iso8601)
end
if !metricInfo.nil?
metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "memoryTotal", "containerGpumemoryTotalBytes", metricTime))
metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "memoryUsed","containerGpumemoryUsedBytes", metricTime))
metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "dutyCycle","containerGpuDutyCycle", metricTime))
metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "memoryUsed", "containerGpumemoryUsedBytes", metricTime))
metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "dutyCycle", "containerGpuDutyCycle", metricTime))

metricDataItems.concat(getPersistentVolumeMetrics(metricInfo, hostName, "usedBytes", Constants::PV_USED_BYTES, metricTime))
else
Expand All @@ -332,7 +333,6 @@ def getPersistentVolumeMetrics(metricJSON, hostName, metricNameToCollect, metric
begin
metricInfo = metricJSON
metricInfo["pods"].each do |pod|

podNamespace = pod["podRef"]["namespace"]
excludeNamespace = false
if (podNamespace.downcase == "kube-system") && @pvKubeSystemCollectionMetricsEnabled == "false"
Expand All @@ -356,11 +356,11 @@ def getPersistentVolumeMetrics(metricJSON, hostName, metricNameToCollect, metric
metricItem["Computer"] = hostName
metricItem["Name"] = metricNameToReturn
metricItem["Value"] = volume[metricNameToCollect]
metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
metricItem["Namespace"] = Constants::INSIGTHTSMETRICS_TAGS_PV_NAMESPACE

metricTags = {}
metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID ] = clusterId
metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId
metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
metricTags[Constants::INSIGHTSMETRICS_TAGS_POD_UID] = podUid
metricTags[Constants::INSIGHTSMETRICS_TAGS_POD_NAME] = podName
Expand All @@ -370,7 +370,7 @@ def getPersistentVolumeMetrics(metricJSON, hostName, metricNameToCollect, metric
metricTags[Constants::INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES] = volume["capacityBytes"]

metricItem["Tags"] = metricTags

metricItems.push(metricItem)
end
end
Expand All @@ -395,7 +395,6 @@ def getPersistentVolumeMetrics(metricJSON, hostName, metricNameToCollect, metric
return metricItems
end


def getContainerGpuMetricsAsInsightsMetrics(metricJSON, hostName, metricNameToCollect, metricNametoReturn, metricPollTime)
metricItems = []
clusterId = KubernetesApiClient.getClusterId
Expand All @@ -415,18 +414,17 @@ def getContainerGpuMetricsAsInsightsMetrics(metricJSON, hostName, metricNameToCo
if (!accelerator[metricNameToCollect].nil?) #empty check is invalid for non-strings
containerName = container["name"]
metricValue = accelerator[metricNameToCollect]


metricItem = {}
metricItem["CollectionTime"] = metricPollTime
metricItem["Computer"] = hostName
metricItem["Name"] = metricNametoReturn
metricItem["Value"] = metricValue
metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN
metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE

metricTags = {}
metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID ] = clusterId
metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId
metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName
metricTags[Constants::INSIGHTSMETRICS_TAGS_CONTAINER_NAME] = podUid + "/" + containerName
#metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = podNameSpace
Expand All @@ -442,9 +440,9 @@ def getContainerGpuMetricsAsInsightsMetrics(metricJSON, hostName, metricNameToCo
if (!accelerator["id"].nil? && !accelerator["id"].empty?)
metricTags[Constants::INSIGHTSMETRICS_TAGS_GPU_ID] = accelerator["id"]
end

metricItem["Tags"] = metricTags

metricItems.push(metricItem)
end
end
Expand Down Expand Up @@ -921,13 +919,13 @@ def getResponse(winNode, relativeUri)
uri = URI.parse(cAdvisorUri)
if isCAdvisorOnSecurePort()
Net::HTTP.start(uri.host, uri.port,
:use_ssl => true, :open_timeout => 20, :read_timeout => 40,
:ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
:verify_mode => OpenSSL::SSL::VERIFY_NONE) do |http|
cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
cAdvisorApiRequest["Authorization"] = "Bearer #{bearerToken}"
response = http.request(cAdvisorApiRequest)
@Log.info "Got response code #{response.code} from #{uri.request_uri}"
:use_ssl => true, :open_timeout => 20, :read_timeout => 40,
:ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
:verify_mode => OpenSSL::SSL::VERIFY_NONE) do |http|
cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri)
cAdvisorApiRequest["Authorization"] = "Bearer #{bearerToken}"
response = http.request(cAdvisorApiRequest)
@Log.info "Got response code #{response.code} from #{uri.request_uri}"
end
else
Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40) do |http|
Expand All @@ -940,19 +938,24 @@ def getResponse(winNode, relativeUri)
rescue => error
@Log.warn("CAdvisor api request for #{cAdvisorUri} failed: #{error}")
telemetryProps = {}
telemetryProps["Computer"] = winNode["Hostname"]
if !winNode.nil?
hostName = winNode["Hostname"]
else
hostName = (OMS::Common.get_hostname)
end
telemetryProps["Computer"] = hostName
ApplicationInsightsUtility.sendExceptionTelemetry(error, telemetryProps)
end
return response
end

def isCAdvisorOnSecurePort
cAdvisorSecurePort = false
# Check to see whether omsagent needs to use 10255(insecure) port or 10250(secure) port
if !@cAdvisorMetricsSecurePort.nil? && @cAdvisorMetricsSecurePort == "true"
cAdvisorSecurePort = true
end
return cAdvisorSecurePort
cAdvisorSecurePort = false
# Check to see whether omsagent needs to use 10255(insecure) port or 10250(secure) port
if !@cAdvisorMetricsSecurePort.nil? && @cAdvisorMetricsSecurePort == "true"
cAdvisorSecurePort = true
end
return cAdvisorSecurePort
end
end
end
12 changes: 6 additions & 6 deletions source/plugins/ruby/kubelet_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ def get_node_capacity

response = CAdvisorMetricsAPIClient.getAllMetricsCAdvisor(winNode: nil)
if !response.nil? && !response.body.nil?
all_metrics = response.body.split("\n")
#cadvisor machine metrics can exist with (>=1.19) or without dimensions (<1.19)
all_metrics = response.body.split("\n")
#cadvisor machine metrics can exist with (>=1.19) or without dimensions (<1.19)
#so just checking startswith of metric name would be good enough to pick the metric value from exposition format
cpu_capacity = all_metrics.select{|m| m.start_with?('machine_cpu_cores') }.first.split.last.to_f * 1000
cpu_capacity = all_metrics.select { |m| m.start_with?("machine_cpu_cores") }.first.split.last.to_f * 1000
@log.info "CPU Capacity #{cpu_capacity}"
memory_capacity_e = all_metrics.select{|m| m.start_with?('machine_memory_bytes') }.first.split.last
memory_capacity_e = all_metrics.select { |m| m.start_with?("machine_memory_bytes") }.first.split.last
memory_capacity = BigDecimal(memory_capacity_e).to_f
@log.info "Memory Capacity #{memory_capacity}"
return [cpu_capacity, memory_capacity]
Expand Down Expand Up @@ -89,9 +89,9 @@ def get_all_container_limits
@log.info "cpuLimit: #{cpuLimit}"
@log.info "memoryLimit: #{memoryLimit}"
# Get cpu limit in nanocores
containerCpuLimitHash[key] = !cpuLimit.nil? ? KubernetesApiClient.getMetricNumericValue("cpu", cpuLimit) : 0
containerCpuLimitHash[key] = !cpuLimit.nil? ? KubernetesApiClient.getMetricNumericValue("cpu", cpuLimit) : nil
# Get memory limit in bytes
containerMemoryLimitHash[key] = !memoryLimit.nil? ? KubernetesApiClient.getMetricNumericValue("memory", memoryLimit) : 0
containerMemoryLimitHash[key] = !memoryLimit.nil? ? KubernetesApiClient.getMetricNumericValue("memory", memoryLimit) : nil
end
end
end
Expand Down