From 619531ddd4ae0af42732bd9773c9cb186e3852b3 Mon Sep 17 00:00:00 2001 From: r-dilip Date: Mon, 4 Nov 2019 18:08:39 -0800 Subject: [PATCH] Fix the bug where if a warning condition appears before fail condition, the node condition is reported as warning instead of fail. Also fix the node conditions state to consider unknown as a failure state --- source/code/plugin/health/health_monitor_utils.rb | 14 +++++++++----- source/code/plugin/in_kube_health.rb | 12 ++++++++++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/source/code/plugin/health/health_monitor_utils.rb b/source/code/plugin/health/health_monitor_utils.rb index 0d297d215..2fa2d3a52 100644 --- a/source/code/plugin/health/health_monitor_utils.rb +++ b/source/code/plugin/health/health_monitor_utils.rb @@ -108,6 +108,8 @@ def get_pods_ready_hash(resources) def get_node_state_from_node_conditions(monitor_config, node_conditions) pass = false + warn = false + fail = false failtypes = ['outofdisk', 'networkunavailable'].to_set #default fail types if !monitor_config.nil? && !monitor_config["NodeConditionTypesForFailedState"].nil? failtypes = monitor_config["NodeConditionTypesForFailedState"] @@ -123,18 +125,20 @@ def get_node_state_from_node_conditions(monitor_config, node_conditions) #for each condition in the configuration, check if the type is not false. If yes, update state to fail if (failtypes.include?(type.downcase) && (status == 'True' || status == 'Unknown')) - return HealthMonitorStates::FAIL + fail = true elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown')) - return HealthMonitorStates::WARNING + warn = true elsif type == "Ready" && status == 'True' pass = true end end - if pass - return HealthMonitorStates::PASS - else + if fail return HealthMonitorStates::FAIL + elsif warn + return HealthMonitorStates::WARNING + else + return HealthMonitorStates::PASS end end diff --git a/source/code/plugin/in_kube_health.rb b/source/code/plugin/in_kube_health.rb index affbdd275..51ffa86d5 100644 --- a/source/code/plugin/in_kube_health.rb +++ b/source/code/plugin/in_kube_health.rb @@ -263,9 +263,17 @@ def process_node_condition_monitor(node_inventory) node_state = HealthMonitorUtils.get_node_state_from_node_conditions(monitor_config, conditions) details = {} conditions.each do |condition| - condition_state = !(condition['status'].downcase == 'true' && condition['type'].downcase != 'ready') ? HealthMonitorStates::PASS : HealthMonitorStates::FAIL + condition_state = HealthMonitorStates::PASS + if condition['type'].downcase != 'ready' + if (condition['status'].downcase == 'true' || condition['status'].downcase == 'unknown') + condition_state = HealthMonitorStates::FAIL + end + else #Condition == READY + if condition['status'].downcase != 'true' + condition_state = HealthMonitorStates::FAIL + end + end details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message'], "State" => condition_state} - #@@hmlog.info "Node Condition details: #{JSON.pretty_generate(details)}" end health_monitor_record = {"timestamp" => timestamp, "state" => node_state, "details" => details} monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id, node_name])