diff --git a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb index 102cb05f2..8cb6f603e 100644 --- a/source/plugins/ruby/CAdvisorMetricsAPIClient.rb +++ b/source/plugins/ruby/CAdvisorMetricsAPIClient.rb @@ -66,6 +66,7 @@ class CAdvisorMetricsAPIClient #cadvisor ports @@CADVISOR_SECURE_PORT = "10250" @@CADVISOR_NON_SECURE_PORT = "10255" + def initialize end @@ -86,40 +87,40 @@ def getPodsFromCAdvisor(winNode: nil) end def getBaseCAdvisorUri(winNode) - cAdvisorSecurePort = isCAdvisorOnSecurePort() + cAdvisorSecurePort = isCAdvisorOnSecurePort() + + if !!cAdvisorSecurePort == true + defaultHost = "https://localhost:#{@@CADVISOR_SECURE_PORT}" + else + defaultHost = "http://localhost:#{@@CADVISOR_NON_SECURE_PORT}" + end + + if !winNode.nil? + nodeIP = winNode["InternalIP"] + else + nodeIP = ENV["NODE_IP"] + end + if !nodeIP.nil? + @Log.info("Using #{nodeIP} for CAdvisor Host") if !!cAdvisorSecurePort == true - defaultHost = "https://localhost:#{@@CADVISOR_SECURE_PORT}" + return "https://#{nodeIP}:#{@@CADVISOR_SECURE_PORT}" else - defaultHost = "http://localhost:#{@@CADVISOR_NON_SECURE_PORT}" + return "http://#{nodeIP}:#{@@CADVISOR_NON_SECURE_PORT}" end - + else + @Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost}") if !winNode.nil? - nodeIP = winNode["InternalIP"] - else - nodeIP = ENV["NODE_IP"] - end - - if !nodeIP.nil? - @Log.info("Using #{nodeIP} for CAdvisor Host") - if !!cAdvisorSecurePort == true - return "https://#{nodeIP}:#{@@CADVISOR_SECURE_PORT}" - else - return "http://#{nodeIP}:#{@@CADVISOR_NON_SECURE_PORT}" - end + return nil else - @Log.warn ("NODE_IP environment variable not set. Using default as : #{defaultHost}") - if !winNode.nil? - return nil - else - return defaultHost - end + return defaultHost end + end end def getCAdvisorUri(winNode, relativeUri) - baseUri = getBaseCAdvisorUri(winNode) - return baseUri + relativeUri + baseUri = getBaseCAdvisorUri(winNode) + return baseUri + relativeUri end def getMetrics(winNode: nil, metricTime: Time.now.utc.iso8601) @@ -254,20 +255,20 @@ def getContainerCpuMetricItems(metricJSON, hostName, cpuMetricNameToCollect, met elsif (!@containerLogsRoute.nil? && !@containerLogsRoute.empty?) telemetryProps["containerLogsRoute"] = @containerLogsRoute end - #telemetry about health model - if (!@hmEnabled.nil? && !@hmEnabled.empty?) + #telemetry about health model + if (!@hmEnabled.nil? && !@hmEnabled.empty?) telemetryProps["hmEnabled"] = @hmEnabled - end - #telemetry for npm integration - if (!@npmIntegrationAdvanced.nil? && !@npmIntegrationAdvanced.empty?) - telemetryProps["int-npm-a"] = "1" - elsif (!@npmIntegrationBasic.nil? && !@npmIntegrationBasic.empty?) - telemetryProps["int-npm-b"] = "1" - end - #telemetry for Container log schema version clusterContainerLogSchemaVersion - if (!@clusterContainerLogSchemaVersion.nil? && !@clusterContainerLogSchemaVersion.empty?) + end + #telemetry for npm integration + if (!@npmIntegrationAdvanced.nil? && !@npmIntegrationAdvanced.empty?) + telemetryProps["int-npm-a"] = "1" + elsif (!@npmIntegrationBasic.nil? && !@npmIntegrationBasic.empty?) + telemetryProps["int-npm-b"] = "1" + end + #telemetry for Container log schema version clusterContainerLogSchemaVersion + if (!@clusterContainerLogSchemaVersion.nil? && !@clusterContainerLogSchemaVersion.empty?) telemetryProps["containerLogVer"] = @clusterContainerLogSchemaVersion - end + end ApplicationInsightsUtility.sendMetricTelemetry(metricNametoReturn, metricValue, telemetryProps) end end @@ -308,8 +309,8 @@ def getInsightsMetrics(winNode: nil, metricTime: Time.now.utc.iso8601) end if !metricInfo.nil? metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "memoryTotal", "containerGpumemoryTotalBytes", metricTime)) - metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "memoryUsed","containerGpumemoryUsedBytes", metricTime)) - metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "dutyCycle","containerGpuDutyCycle", metricTime)) + metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "memoryUsed", "containerGpumemoryUsedBytes", metricTime)) + metricDataItems.concat(getContainerGpuMetricsAsInsightsMetrics(metricInfo, hostName, "dutyCycle", "containerGpuDutyCycle", metricTime)) metricDataItems.concat(getPersistentVolumeMetrics(metricInfo, hostName, "usedBytes", Constants::PV_USED_BYTES, metricTime)) else @@ -332,7 +333,6 @@ def getPersistentVolumeMetrics(metricJSON, hostName, metricNameToCollect, metric begin metricInfo = metricJSON metricInfo["pods"].each do |pod| - podNamespace = pod["podRef"]["namespace"] excludeNamespace = false if (podNamespace.downcase == "kube-system") && @pvKubeSystemCollectionMetricsEnabled == "false" @@ -356,11 +356,11 @@ def getPersistentVolumeMetrics(metricJSON, hostName, metricNameToCollect, metric metricItem["Computer"] = hostName metricItem["Name"] = metricNameToReturn metricItem["Value"] = volume[metricNameToCollect] - metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN + metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN metricItem["Namespace"] = Constants::INSIGTHTSMETRICS_TAGS_PV_NAMESPACE - + metricTags = {} - metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID ] = clusterId + metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName metricTags[Constants::INSIGHTSMETRICS_TAGS_POD_UID] = podUid metricTags[Constants::INSIGHTSMETRICS_TAGS_POD_NAME] = podName @@ -370,7 +370,7 @@ def getPersistentVolumeMetrics(metricJSON, hostName, metricNameToCollect, metric metricTags[Constants::INSIGHTSMETRICS_TAGS_PV_CAPACITY_BYTES] = volume["capacityBytes"] metricItem["Tags"] = metricTags - + metricItems.push(metricItem) end end @@ -395,7 +395,6 @@ def getPersistentVolumeMetrics(metricJSON, hostName, metricNameToCollect, metric return metricItems end - def getContainerGpuMetricsAsInsightsMetrics(metricJSON, hostName, metricNameToCollect, metricNametoReturn, metricPollTime) metricItems = [] clusterId = KubernetesApiClient.getClusterId @@ -415,18 +414,17 @@ def getContainerGpuMetricsAsInsightsMetrics(metricJSON, hostName, metricNameToCo if (!accelerator[metricNameToCollect].nil?) #empty check is invalid for non-strings containerName = container["name"] metricValue = accelerator[metricNameToCollect] - metricItem = {} metricItem["CollectionTime"] = metricPollTime metricItem["Computer"] = hostName metricItem["Name"] = metricNametoReturn metricItem["Value"] = metricValue - metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN + metricItem["Origin"] = Constants::INSIGHTSMETRICS_TAGS_ORIGIN metricItem["Namespace"] = Constants::INSIGHTSMETRICS_TAGS_GPU_NAMESPACE - + metricTags = {} - metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID ] = clusterId + metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERID] = clusterId metricTags[Constants::INSIGHTSMETRICS_TAGS_CLUSTERNAME] = clusterName metricTags[Constants::INSIGHTSMETRICS_TAGS_CONTAINER_NAME] = podUid + "/" + containerName #metricTags[Constants::INSIGHTSMETRICS_TAGS_K8SNAMESPACE] = podNameSpace @@ -442,9 +440,9 @@ def getContainerGpuMetricsAsInsightsMetrics(metricJSON, hostName, metricNameToCo if (!accelerator["id"].nil? && !accelerator["id"].empty?) metricTags[Constants::INSIGHTSMETRICS_TAGS_GPU_ID] = accelerator["id"] end - + metricItem["Tags"] = metricTags - + metricItems.push(metricItem) end end @@ -921,13 +919,13 @@ def getResponse(winNode, relativeUri) uri = URI.parse(cAdvisorUri) if isCAdvisorOnSecurePort() Net::HTTP.start(uri.host, uri.port, - :use_ssl => true, :open_timeout => 20, :read_timeout => 40, - :ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", - :verify_mode => OpenSSL::SSL::VERIFY_NONE) do |http| - cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri) - cAdvisorApiRequest["Authorization"] = "Bearer #{bearerToken}" - response = http.request(cAdvisorApiRequest) - @Log.info "Got response code #{response.code} from #{uri.request_uri}" + :use_ssl => true, :open_timeout => 20, :read_timeout => 40, + :ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", + :verify_mode => OpenSSL::SSL::VERIFY_NONE) do |http| + cAdvisorApiRequest = Net::HTTP::Get.new(uri.request_uri) + cAdvisorApiRequest["Authorization"] = "Bearer #{bearerToken}" + response = http.request(cAdvisorApiRequest) + @Log.info "Got response code #{response.code} from #{uri.request_uri}" end else Net::HTTP.start(uri.host, uri.port, :use_ssl => false, :open_timeout => 20, :read_timeout => 40) do |http| @@ -940,19 +938,24 @@ def getResponse(winNode, relativeUri) rescue => error @Log.warn("CAdvisor api request for #{cAdvisorUri} failed: #{error}") telemetryProps = {} - telemetryProps["Computer"] = winNode["Hostname"] + if !winNode.nil? + hostName = winNode["Hostname"] + else + hostName = (OMS::Common.get_hostname) + end + telemetryProps["Computer"] = hostName ApplicationInsightsUtility.sendExceptionTelemetry(error, telemetryProps) end return response end def isCAdvisorOnSecurePort - cAdvisorSecurePort = false - # Check to see whether omsagent needs to use 10255(insecure) port or 10250(secure) port - if !@cAdvisorMetricsSecurePort.nil? && @cAdvisorMetricsSecurePort == "true" - cAdvisorSecurePort = true - end - return cAdvisorSecurePort + cAdvisorSecurePort = false + # Check to see whether omsagent needs to use 10255(insecure) port or 10250(secure) port + if !@cAdvisorMetricsSecurePort.nil? && @cAdvisorMetricsSecurePort == "true" + cAdvisorSecurePort = true + end + return cAdvisorSecurePort end end end diff --git a/source/plugins/ruby/kubelet_utils.rb b/source/plugins/ruby/kubelet_utils.rb index bd2bd75b7..e2c731b79 100644 --- a/source/plugins/ruby/kubelet_utils.rb +++ b/source/plugins/ruby/kubelet_utils.rb @@ -20,12 +20,12 @@ def get_node_capacity response = CAdvisorMetricsAPIClient.getAllMetricsCAdvisor(winNode: nil) if !response.nil? && !response.body.nil? - all_metrics = response.body.split("\n") - #cadvisor machine metrics can exist with (>=1.19) or without dimensions (<1.19) + all_metrics = response.body.split("\n") + #cadvisor machine metrics can exist with (>=1.19) or without dimensions (<1.19) #so just checking startswith of metric name would be good enough to pick the metric value from exposition format - cpu_capacity = all_metrics.select{|m| m.start_with?('machine_cpu_cores') }.first.split.last.to_f * 1000 + cpu_capacity = all_metrics.select { |m| m.start_with?("machine_cpu_cores") }.first.split.last.to_f * 1000 @log.info "CPU Capacity #{cpu_capacity}" - memory_capacity_e = all_metrics.select{|m| m.start_with?('machine_memory_bytes') }.first.split.last + memory_capacity_e = all_metrics.select { |m| m.start_with?("machine_memory_bytes") }.first.split.last memory_capacity = BigDecimal(memory_capacity_e).to_f @log.info "Memory Capacity #{memory_capacity}" return [cpu_capacity, memory_capacity] @@ -89,9 +89,9 @@ def get_all_container_limits @log.info "cpuLimit: #{cpuLimit}" @log.info "memoryLimit: #{memoryLimit}" # Get cpu limit in nanocores - containerCpuLimitHash[key] = !cpuLimit.nil? ? KubernetesApiClient.getMetricNumericValue("cpu", cpuLimit) : 0 + containerCpuLimitHash[key] = !cpuLimit.nil? ? KubernetesApiClient.getMetricNumericValue("cpu", cpuLimit) : nil # Get memory limit in bytes - containerMemoryLimitHash[key] = !memoryLimit.nil? ? KubernetesApiClient.getMetricNumericValue("memory", memoryLimit) : 0 + containerMemoryLimitHash[key] = !memoryLimit.nil? ? KubernetesApiClient.getMetricNumericValue("memory", memoryLimit) : nil end end end