Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions source/code/plugin/health/health_monitor_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def get_pods_ready_hash(resources)

def get_node_state_from_node_conditions(monitor_config, node_conditions)
pass = false
warn = false
fail = false
failtypes = ['outofdisk', 'networkunavailable'].to_set #default fail types
if !monitor_config.nil? && !monitor_config["NodeConditionTypesForFailedState"].nil?
failtypes = monitor_config["NodeConditionTypesForFailedState"]
Expand All @@ -123,18 +125,20 @@ def get_node_state_from_node_conditions(monitor_config, node_conditions)

#for each condition in the configuration, check if the type is not false. If yes, update state to fail
if (failtypes.include?(type.downcase) && (status == 'True' || status == 'Unknown'))
return HealthMonitorStates::FAIL
fail = true
elsif ((type == "DiskPressure" || type == "MemoryPressure" || type == "PIDPressure") && (status == 'True' || status == 'Unknown'))
return HealthMonitorStates::WARNING
warn = true
elsif type == "Ready" && status == 'True'
pass = true
end
end

if pass
return HealthMonitorStates::PASS
else
if fail
return HealthMonitorStates::FAIL
elsif warn
return HealthMonitorStates::WARNING
else
return HealthMonitorStates::PASS
end
end

Expand Down
12 changes: 10 additions & 2 deletions source/code/plugin/in_kube_health.rb
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,17 @@ def process_node_condition_monitor(node_inventory)
node_state = HealthMonitorUtils.get_node_state_from_node_conditions(monitor_config, conditions)
details = {}
conditions.each do |condition|
condition_state = !(condition['status'].downcase == 'true' && condition['type'].downcase != 'ready') ? HealthMonitorStates::PASS : HealthMonitorStates::FAIL
condition_state = HealthMonitorStates::PASS
if condition['type'].downcase != 'ready'
if (condition['status'].downcase == 'true' || condition['status'].downcase == 'unknown')
condition_state = HealthMonitorStates::FAIL
end
else #Condition == READY
if condition['status'].downcase != 'true'
condition_state = HealthMonitorStates::FAIL
end
end
details[condition['type']] = {"Reason" => condition['reason'], "Message" => condition['message'], "State" => condition_state}
#@@hmlog.info "Node Condition details: #{JSON.pretty_generate(details)}"
end
health_monitor_record = {"timestamp" => timestamp, "state" => node_state, "details" => details}
monitor_instance_id = HealthMonitorUtils.get_monitor_instance_id(monitor_id, [@@cluster_id, node_name])
Expand Down