diff --git a/doc/admin-guide/files/strategies.yaml.en.rst b/doc/admin-guide/files/strategies.yaml.en.rst index 077d923b356..c715c9a0e45 100644 --- a/doc/admin-guide/files/strategies.yaml.en.rst +++ b/doc/admin-guide/files/strategies.yaml.en.rst @@ -183,6 +183,7 @@ Each **strategy** in the list may using the following parameters:: - **scheme** Indicates which scheme the strategy supports, *http* or *https* - **failover**: A map of **failover** information. - **max_simple_retries**: Part of the **failover** map and is an integer value of the maximum number of retries for a **simple retry** on the list of indicated response codes. **simple retry** is used to retry an upstream request using another upstream server if the response received on from the original upstream request matches any of the response codes configured for this strategy in the **failover** map. If no failover response codes are configured, no **simple retry** is attempted. + - **max_unavailable_retries Part of the **failover** map and is an integer value of the maximum number of retries for a **unavailable retry** on the list of indicated markdown response codes. **unavailable retry** is used to retry an upstream request using another upstream server if the response received on from the original upstream request matches any of the markdown response codes configured for this strategy in the **failover** map. If no failover markdown response codes are configured, no **unavailable retry** is attempted. **unavailable retry** differs from **simple retry** in that if a failover for retry is done, the previously retried server is marked down for rety. - **ring_mode**: Part of the **failover** map. The host ring selection mode. Use either **exhaust_ring** or **alternate_ring** @@ -190,6 +191,7 @@ Each **strategy** in the list may using the following parameters:: #. **alternate_ring**: retry hosts are selected from groups in an alternating group fashion. - **response_codes**: Part of the **failover** map. This is a list of **http** response codes that may be used for **simple retry**. + - **markdown_codes**: Part of the **failover** map. This is a list of **http** response codes that may be used for **unavailable retry** which will cause a parent markdown. - **health_check**: Part of the **failover** map. A list of health checks. **passive** is the default and means that the state machine marks down **hosts** when a transaction timeout or connection error is detected. **passive** is always used by the next hop strategies. **active** means that some external process may actively health check the hosts using the defined **health check url** and mark them down using **traffic_ctl**. @@ -211,6 +213,7 @@ Example: ring_mode: exhaust_ring response_codes: - 404 + markdown_codes: - 503 health_check: - passive @@ -226,6 +229,7 @@ Example: ring_mode: exhaust_ring response_codes: - 404 + markdown_codes: - 503 health_check: - passive diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index 4dde9e7cc86..d9e30bbb59d 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -380,15 +380,7 @@ response_is_retryable(HttpTransact::State *s, HTTPStatus response_code) } const url_mapping *mp = s->url_map.getMapping(); if (mp && mp->strategy) { - if (mp->strategy->responseIsRetryable(s->current.simple_retry_attempts, response_code)) { - if (mp->strategy->onFailureMarkParentDown(response_code)) { - return PARENT_RETRY_UNAVAILABLE_SERVER; - } else { - return PARENT_RETRY_SIMPLE; - } - } else { - return PARENT_RETRY_NONE; - } + return mp->strategy->responseIsRetryable(s->state_machine->sm_id, s->current, response_code); } if (s->parent_params && !s->parent_result.response_is_retryable(response_code)) { diff --git a/proxy/http/remap/NextHopSelectionStrategy.cc b/proxy/http/remap/NextHopSelectionStrategy.cc index 9e076921212..8e8ef6c7201 100644 --- a/proxy/http/remap/NextHopSelectionStrategy.cc +++ b/proxy/http/remap/NextHopSelectionStrategy.cc @@ -108,7 +108,11 @@ NextHopSelectionStrategy::Init(const YAML::Node &n) if (failover_node["max_simple_retries"]) { max_simple_retries = failover_node["max_simple_retries"].as(); } + if (failover_node["max_unavailable_retries"]) { + max_unavailable_retries = failover_node["max_unavailable_retries"].as(); + } + // response codes for simple retry. YAML::Node resp_codes_node; if (failover_node["response_codes"]) { resp_codes_node = failover_node["response_codes"]; @@ -127,6 +131,24 @@ NextHopSelectionStrategy::Init(const YAML::Node &n) resp_codes.sort(); } } + YAML::Node markdown_codes_node; + if (failover_node["markdown_codes"]) { + markdown_codes_node = failover_node["markdown_codes"]; + if (markdown_codes_node.Type() != YAML::NodeType::Sequence) { + NH_Error("Error in the markdown_codes definition for the strategy named '%s', skipping markdown_codes.", + strategy_name.c_str()); + } else { + for (auto &&k : markdown_codes_node) { + auto code = k.as(); + if (code > 300 && code < 599) { + markdown_codes.add(code); + } else { + NH_Note("Skipping invalid markdown response code '%d' for the strategy named '%s'.", code, strategy_name.c_str()); + } + } + markdown_codes.sort(); + } + } YAML::Node health_check_node; if (failover_node["health_check"]) { health_check_node = failover_node["health_check"]; @@ -227,17 +249,27 @@ NextHopSelectionStrategy::nextHopExists(TSHttpTxn txnp, void *ih) return false; } -bool -NextHopSelectionStrategy::responseIsRetryable(unsigned int current_retry_attempts, HTTPStatus response_code) -{ - return this->resp_codes.contains(response_code) && current_retry_attempts < this->max_simple_retries && - current_retry_attempts < this->num_parents; -} - -bool -NextHopSelectionStrategy::onFailureMarkParentDown(HTTPStatus response_code) +ParentRetry_t +NextHopSelectionStrategy::responseIsRetryable(int64_t sm_id, HttpTransact::CurrentInfo ¤t_info, HTTPStatus response_code) { - return static_cast(response_code) >= 500 && static_cast(response_code) <= 599; + unsigned sa = current_info.simple_retry_attempts; + unsigned ua = current_info.unavailable_server_retry_attempts; + + NH_Debug(NH_DEBUG_TAG, + "[%" PRIu64 "] response_code %d, simple_retry_attempts: %d max_simple_retries: %d, unavailable_server_retry_attempts: " + "%d, max_unavailable_retries: %d", + sm_id, response_code, sa, this->max_simple_retries, ua, max_unavailable_retries); + if (this->resp_codes.contains(response_code) && sa < this->max_simple_retries && sa < this->num_parents) { + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] response code %d is retryable, returning PARENT_RETRY_SIMPLE", sm_id, response_code); + return PARENT_RETRY_SIMPLE; + } + if (this->markdown_codes.contains(response_code) && ua < this->max_unavailable_retries && ua < this->num_parents) { + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] response code %d is retryable, returning PARENT_RETRY_UNAVAILABLE_SERVER", sm_id, + response_code); + return PARENT_RETRY_UNAVAILABLE_SERVER; + } + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] response code %d is not retryable, returning PARENT_RETRY_NONE", sm_id, response_code); + return PARENT_RETRY_NONE; } namespace YAML diff --git a/proxy/http/remap/NextHopSelectionStrategy.h b/proxy/http/remap/NextHopSelectionStrategy.h index 9228083613f..24f39f867d8 100644 --- a/proxy/http/remap/NextHopSelectionStrategy.h +++ b/proxy/http/remap/NextHopSelectionStrategy.h @@ -25,6 +25,7 @@ #include "ts/parentselectdefs.h" #include "ParentSelection.h" +#include "HttpTransact.h" #ifndef _NH_UNIT_TESTS_ #define NH_Debug(tag, ...) Debug(tag, __VA_ARGS__) @@ -240,8 +241,7 @@ class NextHopSelectionStrategy const time_t now = 0); bool nextHopExists(TSHttpTxn txnp, void *ih = nullptr); - virtual bool responseIsRetryable(unsigned int current_retry_attempts, HTTPStatus response_code); - virtual bool onFailureMarkParentDown(HTTPStatus response_code); + virtual ParentRetry_t responseIsRetryable(int64_t sm_id, HttpTransact::CurrentInfo ¤t_info, HTTPStatus response_code); std::string strategy_name; bool go_direct = true; @@ -250,15 +250,17 @@ class NextHopSelectionStrategy NHPolicyType policy_type = NH_UNDEFINED; NHSchemeType scheme = NH_SCHEME_NONE; NHRingMode ring_mode = NH_ALTERNATE_RING; - ResponseCodes resp_codes; + ResponseCodes resp_codes; // simple retry codes + ResponseCodes markdown_codes; // unavailable server retry and markdown codes HealthChecks health_checks; NextHopHealthStatus passive_health; std::vector>> host_groups; - uint32_t max_simple_retries = 1; - uint32_t groups = 0; - uint32_t grp_index = 0; - uint32_t hst_index = 0; - uint32_t num_parents = 0; - uint32_t distance = 0; // index into the strategies list. - int max_retriers = 1; + uint32_t max_simple_retries = 1; + uint32_t max_unavailable_retries = 1; + uint32_t groups = 0; + uint32_t grp_index = 0; + uint32_t hst_index = 0; + uint32_t num_parents = 0; + uint32_t distance = 0; // index into the strategies list. + int max_retriers = 1; }; diff --git a/proxy/http/remap/unit-tests/combined.yaml b/proxy/http/remap/unit-tests/combined.yaml index de4d347a3c8..247708eb4a7 100644 --- a/proxy/http/remap/unit-tests/combined.yaml +++ b/proxy/http/remap/unit-tests/combined.yaml @@ -89,10 +89,15 @@ strategies: exhaust_ring # enumerated as exhaust_ring or alternate_ring #1) in 'exhaust_ring' mode all the servers in a ring are exhausted before failing over to secondary ring #2) in 'alternate_ring' mode causes the failover to another server in secondary ring. - response_codes: # defines the responses codes for failover in exhaust_ring mode + response_codes: # defines the responses codes for simple retry failover in - 404 + - 402 + - 403 + markdown_codes: # defines the response codes for unavailble server retry + - 405 - 502 - 503 + health_check: # specifies the list of healthchecks that should be considered for failover. A list of enums: 'passive' or 'active' - passive - active diff --git a/proxy/http/remap/unit-tests/test_NextHopStrategyFactory.cc b/proxy/http/remap/unit-tests/test_NextHopStrategyFactory.cc index de9a71f8aa8..30beadfcbd3 100644 --- a/proxy/http/remap/unit-tests/test_NextHopStrategyFactory.cc +++ b/proxy/http/remap/unit-tests/test_NextHopStrategyFactory.cc @@ -386,7 +386,7 @@ SCENARIO("factory tests loading yaml configs", "[loadConfig]") CHECK(strategy->ring_mode == NH_EXHAUST_RING); CHECK(strategy->groups == 2); CHECK(strategy->resp_codes.contains(404)); - CHECK(strategy->resp_codes.contains(502)); + CHECK(strategy->resp_codes.contains(402)); CHECK(!strategy->resp_codes.contains(604)); CHECK(strategy->health_checks.active == true); CHECK(strategy->health_checks.passive == true); @@ -446,7 +446,7 @@ SCENARIO("factory tests loading yaml configs", "[loadConfig]") } } CHECK(strategy->resp_codes.contains(404)); - CHECK(strategy->resp_codes.contains(503)); + CHECK(strategy->resp_codes.contains(403)); CHECK(!strategy->resp_codes.contains(604)); } } @@ -870,6 +870,9 @@ SCENARIO("factory tests loading yaml configs from a directory", "[loadConfig]") CHECK(strategy->resp_codes.contains(404)); CHECK(strategy->resp_codes.contains(503)); CHECK(!strategy->resp_codes.contains(604)); + CHECK(!strategy->markdown_codes.contains(405)); + CHECK(!strategy->markdown_codes.contains(502)); + CHECK(!strategy->markdown_codes.contains(503)); } }