diff --git a/installer/conf/telegraf.conf b/installer/conf/telegraf.conf index 4883de81b..cd22a56b4 100644 --- a/installer/conf/telegraf.conf +++ b/installer/conf/telegraf.conf @@ -566,7 +566,8 @@ ## Use TLS but skip chain & host verification insecure_skip_verify = true #tagexclude = ["AgentVersion","AKS_RESOURCE_ID","ACS_RESOURCE_NAME", "Region", "ClusterName", "ClusterType", "Computer", "ControllerType"] - + [inputs.prometheus.tagpass] + operation_type = ["create_container", "remove_container", "pull_image"] ## prometheus custom metrics [[inputs.prometheus]] diff --git a/installer/scripts/tomlparser-prom-customconfig.rb b/installer/scripts/tomlparser-prom-customconfig.rb index ab868f1a9..7aad580ee 100644 --- a/installer/scripts/tomlparser-prom-customconfig.rb +++ b/installer/scripts/tomlparser-prom-customconfig.rb @@ -47,7 +47,7 @@ def parseConfigMap end def checkForTypeArray(arrayValue, arrayType) - if (arrayValue.nil? || (arrayValue.kind_of?(Array) && arrayValue.length > 0 && arrayValue[0].kind_of?(arrayType))) + if (arrayValue.nil? || (arrayValue.kind_of?(Array) && ((arrayValue.length == 0) || (arrayValue.length > 0 && arrayValue[0].kind_of?(arrayType))))) return true else return false diff --git a/source/code/go/src/plugins/oms.go b/source/code/go/src/plugins/oms.go index 6d78455bd..01aab85b4 100644 --- a/source/code/go/src/plugins/oms.go +++ b/source/code/go/src/plugins/oms.go @@ -184,11 +184,11 @@ type laKubeMonAgentEvents struct { } type KubeMonAgentEventTags struct { - PodName string - ContainerId string - FirstOccurance string - LastOccurance string - Count int + PodName string + ContainerId string + FirstOccurrence string + LastOccurrence string + Count int } type KubeMonAgentEventBlob struct { @@ -259,7 +259,14 @@ func updateContainerImageNameMaps() { } for _, pod := range pods.Items { - for _, status := range pod.Status.ContainerStatuses { + podContainerStatuses := pod.Status.ContainerStatuses + + // Doing this to include init container logs as well + podInitContainerStatuses := pod.Status.InitContainerStatuses + if (podInitContainerStatuses != nil) && (len(podInitContainerStatuses) > 0) { + podContainerStatuses = append(podContainerStatuses, podInitContainerStatuses...) + } + for _, status := range podContainerStatuses { lastSlashIndex := strings.LastIndex(status.ContainerID, "/") containerID := status.ContainerID[lastSlashIndex+1 : len(status.ContainerID)] image := status.Image @@ -344,22 +351,22 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K if val, ok := ConfigErrorEvent[logRecordString]; ok { Log("In config error existing hash update\n") eventCount := val.Count - eventFirstOccurance := val.FirstOccurance + eventFirstOccurrence := val.FirstOccurrence ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{ - PodName: podName, - ContainerId: containerID, - FirstOccurance: eventFirstOccurance, - LastOccurance: eventTimeStamp, - Count: eventCount + 1, + PodName: podName, + ContainerId: containerID, + FirstOccurrence: eventFirstOccurrence, + LastOccurrence: eventTimeStamp, + Count: eventCount + 1, } } else { ConfigErrorEvent[logRecordString] = KubeMonAgentEventTags{ - PodName: podName, - ContainerId: containerID, - FirstOccurance: eventTimeStamp, - LastOccurance: eventTimeStamp, - Count: 1, + PodName: podName, + ContainerId: containerID, + FirstOccurrence: eventTimeStamp, + LastOccurrence: eventTimeStamp, + Count: 1, } } @@ -374,22 +381,22 @@ func populateKubeMonAgentEventHash(record map[interface{}]interface{}, errType K if val, ok := PromScrapeErrorEvent[splitString]; ok { Log("In config error existing hash update\n") eventCount := val.Count - eventFirstOccurance := val.FirstOccurance + eventFirstOccurrence := val.FirstOccurrence PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ - PodName: podName, - ContainerId: containerID, - FirstOccurance: eventFirstOccurance, - LastOccurance: eventTimeStamp, - Count: eventCount + 1, + PodName: podName, + ContainerId: containerID, + FirstOccurrence: eventFirstOccurrence, + LastOccurrence: eventTimeStamp, + Count: eventCount + 1, } } else { PromScrapeErrorEvent[splitString] = KubeMonAgentEventTags{ - PodName: podName, - ContainerId: containerID, - FirstOccurance: eventTimeStamp, - LastOccurance: eventTimeStamp, - Count: 1, + PodName: podName, + ContainerId: containerID, + FirstOccurrence: eventTimeStamp, + LastOccurrence: eventTimeStamp, + Count: 1, } } } @@ -756,16 +763,18 @@ func PostDataHelper(tailPluginRecords []map[interface{}]interface{}) int { FlushedRecordsSize += float64(len(stringMap["LogEntry"])) dataItems = append(dataItems, dataItem) - loggedTime, e := time.Parse(time.RFC3339, dataItem.LogEntryTimeStamp) - if e != nil { - message := fmt.Sprintf("Error while converting LogEntryTimeStamp for telemetry purposes: %s", e.Error()) - Log(message) - SendException(message) - } else { - ltncy := float64(start.Sub(loggedTime) / time.Millisecond) - if ltncy >= maxLatency { - maxLatency = ltncy - maxLatencyContainer = dataItem.Name + "=" + dataItem.ID + if dataItem.LogEntryTimeStamp != "" { + loggedTime, e := time.Parse(time.RFC3339, dataItem.LogEntryTimeStamp) + if e != nil { + message := fmt.Sprintf("Error while converting LogEntryTimeStamp for telemetry purposes: %s", e.Error()) + Log(message) + SendException(message) + } else { + ltncy := float64(start.Sub(loggedTime) / time.Millisecond) + if ltncy >= maxLatency { + maxLatency = ltncy + maxLatencyContainer = dataItem.Name + "=" + dataItem.ID + } } } } diff --git a/source/code/plugin/KubernetesApiClient.rb b/source/code/plugin/KubernetesApiClient.rb index 48b25bf14..be1a51791 100644 --- a/source/code/plugin/KubernetesApiClient.rb +++ b/source/code/plugin/KubernetesApiClient.rb @@ -356,9 +356,19 @@ def getContainerResourceRequestsAndLimits(metricJSON, metricCategory, metricName else podUid = pod["metadata"]["uid"] end - if (!pod["spec"]["containers"].nil? && !pod["spec"]["nodeName"].nil?) + + podContainers = [] + if !pod["spec"]["containers"].nil? && !pod["spec"]["containers"].empty? + podContainers = podContainers + pod["spec"]["containers"] + end + # Adding init containers to the record list as well. + if !pod["spec"]["initContainers"].nil? && !pod["spec"]["initContainers"].empty? + podContainers = podContainers + pod["spec"]["initContainers"] + end + + if (!podContainers.nil? && !podContainers.empty? && !pod["spec"]["nodeName"].nil?) nodeName = pod["spec"]["nodeName"] - pod["spec"]["containers"].each do |container| + podContainers.each do |container| containerName = container["name"] metricTime = Time.now.utc.iso8601 #2018-01-30T19:36:14Z if (!container["resources"].nil? && !container["resources"].empty? && !container["resources"][metricCategory].nil? && !container["resources"][metricCategory][metricNameToCollect].nil?) diff --git a/source/code/plugin/in_kube_events.rb b/source/code/plugin/in_kube_events.rb index 5538ba4aa..e1fdc5df6 100644 --- a/source/code/plugin/in_kube_events.rb +++ b/source/code/plugin/in_kube_events.rb @@ -47,17 +47,20 @@ def enumerate(eventList = nil) currentTime = Time.now emitTime = currentTime.to_f batchTime = currentTime.utc.iso8601 - if eventList.nil? - $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}") - events = JSON.parse(KubernetesApiClient.getKubeResourceInfo("events").body) - $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}") - else - events = eventList + + events = eventList + $log.info("in_kube_events::enumerate : Getting events from Kube API @ #{Time.now.utc.iso8601}") + eventInfo = KubernetesApiClient.getKubeResourceInfo("events") + $log.info("in_kube_events::enumerate : Done getting events from Kube API @ #{Time.now.utc.iso8601}") + + if !eventInfo.nil? + events = JSON.parse(eventInfo.body) end + eventQueryState = getEventQueryState newEventQueryState = [] begin - if (!events.empty? && !events["items"].nil?) + if (!events.nil? && !events.empty? && !events["items"].nil?) eventStream = MultiEventStream.new events["items"].each do |items| record = {} diff --git a/source/code/plugin/in_kube_nodes.rb b/source/code/plugin/in_kube_nodes.rb index 42bc13b68..0a0fd9d2e 100644 --- a/source/code/plugin/in_kube_nodes.rb +++ b/source/code/plugin/in_kube_nodes.rb @@ -61,11 +61,19 @@ def enumerate emitTime = currentTime.to_f batchTime = currentTime.utc.iso8601 telemetrySent = false + + nodeInventory = nil + $log.info("in_kube_nodes::enumerate : Getting nodes from Kube API @ #{Time.now.utc.iso8601}") - nodeInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("nodes").body) + nodeInfo = KubernetesApiClient.getKubeResourceInfo("nodes") $log.info("in_kube_nodes::enumerate : Done getting nodes from Kube API @ #{Time.now.utc.iso8601}") + + if !nodeInfo.nil? + nodeInventory = JSON.parse(nodeInfo.body) + end + begin - if (!nodeInventory.empty?) + if (!nodeInventory.nil? && !nodeInventory.empty?) eventStream = MultiEventStream.new containerNodeInventoryEventStream = MultiEventStream.new if !nodeInventory["items"].nil? @@ -95,7 +103,6 @@ def enumerate record["KubernetesProviderID"] = "onprem" end - # Refer to https://kubernetes.io/docs/concepts/architecture/nodes/#condition for possible node conditions. # We check the status of each condition e.g. {"type": "OutOfDisk","status": "False"} . Based on this we # populate the KubeNodeInventory Status field. A possible value for this field could be "Ready OutofDisk" diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index f41ce9095..766831a66 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -48,13 +48,15 @@ def shutdown end def enumerate(podList = nil) - if podList.nil? - $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}") - podInventory = JSON.parse(KubernetesApiClient.getKubeResourceInfo("pods").body) - $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") - else - podInventory = podList + podInventory = podList + $log.info("in_kube_podinventory::enumerate : Getting pods from Kube API @ #{Time.now.utc.iso8601}") + podInfo = KubernetesApiClient.getKubeResourceInfo("pods") + $log.info("in_kube_podinventory::enumerate : Done getting pods from Kube API @ #{Time.now.utc.iso8601}") + + if !podInfo.nil? + podInventory = JSON.parse(podInfo.body) end + begin if (!podInventory.empty? && podInventory.key?("items") && !podInventory["items"].empty?) #get pod inventory & services @@ -137,8 +139,16 @@ def getContainerEnvironmentVariables(pod, clusterCollectEnvironmentVar) begin podSpec = pod["spec"] containerEnvHash = {} - if !podSpec.nil? && !podSpec["containers"].nil? - podSpec["containers"].each do |container| + podContainersEnv = [] + if !podSpec["containers"].nil? && !podSpec["containers"].empty? + podContainersEnv = podContainersEnv + podSpec["containers"] + end + # Adding init containers to the record list as well. + if !podSpec["initContainers"].nil? && !podSpec["initContainers"].empty? + podContainersEnv = podContainersEnv + podSpec["initContainers"] + end + if !podContainersEnv.nil? && !podContainersEnv.empty? + podContainersEnv.each do |container| if !clusterCollectEnvironmentVar.nil? && !clusterCollectEnvironmentVar.empty? && clusterCollectEnvironmentVar.casecmp("false") == 0 containerEnvHash[container["name"]] = ["AZMON_CLUSTER_COLLECT_ENV_VAR=FALSE"] else @@ -289,8 +299,19 @@ def parse_and_emit_records(podInventory, serviceList) end podRestartCount = 0 record["PodRestartCount"] = 0 - if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? #container status block start - items["status"]["containerStatuses"].each do |container| + + podContainers = [] + if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? + podContainers = podContainers + items["status"]["containerStatuses"] + end + # Adding init containers to the record list as well. + if items["status"].key?("initContainerStatuses") && !items["status"]["initContainerStatuses"].empty? + podContainers = podContainers + items["status"]["initContainerStatuses"] + end + + # if items["status"].key?("containerStatuses") && !items["status"]["containerStatuses"].empty? #container status block start + if !podContainers.empty? #container status block start + podContainers.each do |container| containerRestartCount = 0 #container Id is of the form #docker://dfd9da983f1fd27432fb2c1fe3049c0a1d25b1c697b2dc1a530c986e58b16527 diff --git a/source/code/plugin/in_kube_services.rb b/source/code/plugin/in_kube_services.rb index 8b0a013e4..7cd703620 100644 --- a/source/code/plugin/in_kube_services.rb +++ b/source/code/plugin/in_kube_services.rb @@ -46,11 +46,19 @@ def enumerate currentTime = Time.now emitTime = currentTime.to_f batchTime = currentTime.utc.iso8601 + + serviceList = nil + $log.info("in_kube_services::enumerate : Getting services from Kube API @ #{Time.now.utc.iso8601}") - serviceList = JSON.parse(KubernetesApiClient.getKubeResourceInfo("services").body) + serviceInfo = KubernetesApiClient.getKubeResourceInfo("services") $log.info("in_kube_services::enumerate : Done getting services from Kube API @ #{Time.now.utc.iso8601}") + + if !serviceInfo.nil? + serviceList = JSON.parse(serviceInfo.body) + end + begin - if (!serviceList.empty?) + if (!serviceList.nil? && !serviceList.empty?) eventStream = MultiEventStream.new serviceList["items"].each do |items| record = {}