From 0885962fc5298643688ffd7307a1ceda4913165f Mon Sep 17 00:00:00 2001 From: David Michelman Date: Tue, 2 Jul 2019 14:27:17 -0700 Subject: [PATCH 1/4] updating the OMS agent to also collect container last state --- source/code/plugin/in_kube_podinventory.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index 79490ba7d..707d7f592 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -317,6 +317,19 @@ def parse_and_emit_records(podInventory, serviceList) record["ContainerStatusReason"] = containerStatus[containerStatus.keys[0]]["reason"] end end + + # Record the last state of the container. This may have information on why a container was killed. + if container["lastState"].keys.length > 0 + lastStateName = container["lastState"].keys[0] + record["ContainerLastStatus"] = Hash.new + record["ContainerLastStatus"]["lastState"] = lastStateName # get the name of the last state (ex: terminated) + record["ContainerLastStatus"]["reason"] = container["lastState"][lastStateName]["reason"] # (ex: OOMKilled) + record["ContainerLastStatus"]["startedAt"] = container["lastState"][lastStateName]["startedAt"] # (ex: 2019-07-02T14:58:51Z) + record["ContainerLastStatus"]["finishedAt"] = container["lastState"][lastStateName]["finishedAt"] # (ex: 2019-07-02T14:58:52Z) + else + record["ContainerLastStatus"] = container["lastState"] # this is an empty json hash (hashmap) + end + podRestartCount += containerRestartCount records.push(record.dup) From fb88e514e874591187997208542178a0bffd2126 Mon Sep 17 00:00:00 2001 From: David Michelman Date: Mon, 8 Jul 2019 10:55:56 -0700 Subject: [PATCH 2/4] changed a comment --- source/code/plugin/in_kube_podinventory.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index 707d7f592..fe70e4d6f 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -327,7 +327,7 @@ def parse_and_emit_records(podInventory, serviceList) record["ContainerLastStatus"]["startedAt"] = container["lastState"][lastStateName]["startedAt"] # (ex: 2019-07-02T14:58:51Z) record["ContainerLastStatus"]["finishedAt"] = container["lastState"][lastStateName]["finishedAt"] # (ex: 2019-07-02T14:58:52Z) else - record["ContainerLastStatus"] = container["lastState"] # this is an empty json hash (hashmap) + record["ContainerLastStatus"] = container["lastState"] # this is an empty hash end podRestartCount += containerRestartCount From 3015f9f5d4bb73124f45cc63aaf7485209fc7184 Mon Sep 17 00:00:00 2001 From: David Michelman Date: Mon, 8 Jul 2019 15:43:32 -0700 Subject: [PATCH 3/4] git surrounded ContainerLastStatus code in a begin/rescue block --- source/code/plugin/in_kube_podinventory.rb | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index fe70e4d6f..4092814a5 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -319,15 +319,19 @@ def parse_and_emit_records(podInventory, serviceList) end # Record the last state of the container. This may have information on why a container was killed. - if container["lastState"].keys.length > 0 - lastStateName = container["lastState"].keys[0] + begin + if container["lastState"].keys.length > 0 + lastStateName = container["lastState"].keys[0] + record["ContainerLastStatus"] = Hash.new + record["ContainerLastStatus"]["lastState"] = lastStateName # get the name of the last state (ex: terminated) + record["ContainerLastStatus"]["reason"] = container["lastState"][lastStateName]["reason"] # (ex: OOMKilled) + record["ContainerLastStatus"]["startedAt"] = container["lastState"][lastStateName]["startedAt"] # (ex: 2019-07-02T14:58:51Z) + record["ContainerLastStatus"]["finishedAt"] = container["lastState"][lastStateName]["finishedAt"] # (ex: 2019-07-02T14:58:52Z) + else + record["ContainerLastStatus"] = Hash.new + end + rescue record["ContainerLastStatus"] = Hash.new - record["ContainerLastStatus"]["lastState"] = lastStateName # get the name of the last state (ex: terminated) - record["ContainerLastStatus"]["reason"] = container["lastState"][lastStateName]["reason"] # (ex: OOMKilled) - record["ContainerLastStatus"]["startedAt"] = container["lastState"][lastStateName]["startedAt"] # (ex: 2019-07-02T14:58:51Z) - record["ContainerLastStatus"]["finishedAt"] = container["lastState"][lastStateName]["finishedAt"] # (ex: 2019-07-02T14:58:52Z) - else - record["ContainerLastStatus"] = container["lastState"] # this is an empty hash end podRestartCount += containerRestartCount From c401e9cbd9d9ef612230a3cc88a263748e96c866 Mon Sep 17 00:00:00 2001 From: David Michelman Date: Mon, 8 Jul 2019 18:20:03 -0700 Subject: [PATCH 4/4] added a lot of error checking and logging --- source/code/plugin/in_kube_podinventory.rb | 29 ++++++++++++++++------ 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/source/code/plugin/in_kube_podinventory.rb b/source/code/plugin/in_kube_podinventory.rb index 4092814a5..08ce85606 100644 --- a/source/code/plugin/in_kube_podinventory.rb +++ b/source/code/plugin/in_kube_podinventory.rb @@ -320,17 +320,32 @@ def parse_and_emit_records(podInventory, serviceList) # Record the last state of the container. This may have information on why a container was killed. begin - if container["lastState"].keys.length > 0 + if !container["lastState"].nil? && container["lastState"].keys.length == 1 lastStateName = container["lastState"].keys[0] - record["ContainerLastStatus"] = Hash.new - record["ContainerLastStatus"]["lastState"] = lastStateName # get the name of the last state (ex: terminated) - record["ContainerLastStatus"]["reason"] = container["lastState"][lastStateName]["reason"] # (ex: OOMKilled) - record["ContainerLastStatus"]["startedAt"] = container["lastState"][lastStateName]["startedAt"] # (ex: 2019-07-02T14:58:51Z) - record["ContainerLastStatus"]["finishedAt"] = container["lastState"][lastStateName]["finishedAt"] # (ex: 2019-07-02T14:58:52Z) + lastStateObject = container["lastState"][lastStateName] + if !lastStateObject.is_a?(Hash) + raise "expected a hash object. This could signify a bug or a kubernetes API change" + end + + if lastStateObject.key?("reason") && lastStateObject.key?("startedAt") && lastStateObject.key?("finishedAt") + newRecord = Hash.new + newRecord["lastState"] = lastStateName # get the name of the last state (ex: terminated) + newRecord["reason"] = lastStateObject["reason"] # (ex: OOMKilled) + newRecord["startedAt"] = lastStateObject["startedAt"] # (ex: 2019-07-02T14:58:51Z) + newRecord["finishedAt"] = lastStateObject["finishedAt"] # (ex: 2019-07-02T14:58:52Z) + + # only write to the output field if everything previously ran without error + record["ContainerLastStatus"] = newRecord + else + record["ContainerLastStatus"] = Hash.new + end else record["ContainerLastStatus"] = Hash.new end - rescue + rescue => errorStr + $log.warn "Failed in parse_and_emit_record pod inventory while processing ContainerLastStatus: #{errorStr}" + $log.debug_backtrace(errorStr.backtrace) + ApplicationInsightsUtility.sendExceptionTelemetry(errorStr) record["ContainerLastStatus"] = Hash.new end