diff --git a/include/ts/nexthop.h b/include/ts/nexthop.h new file mode 100644 index 00000000000..ed623805649 --- /dev/null +++ b/include/ts/nexthop.h @@ -0,0 +1,49 @@ +/** @file + + Traffic Server SDK API header file + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + @section developers Developers + + NextHop plugin interface. + + */ + +#pragma once + +#include + +// plugin callback commands. +enum NHCmd { NH_MARK_UP, NH_MARK_DOWN }; + +struct NHHealthStatus { + virtual bool isNextHopAvailable(TSHttpTxn txn, const char *hostname, const int port, void *ih = nullptr) = 0; + virtual void markNextHop(TSHttpTxn txn, const char *hostname, const int port, const NHCmd status, void *ih = nullptr, + const time_t now = 0) = 0; + virtual ~NHHealthStatus() {} +}; + +struct NHPluginStrategy { + virtual void findNextHop(TSHttpTxn txnp, void *ih = nullptr) = 0; + virtual bool nextHopExists(TSHttpTxn txnp, void *ih = nullptr) = 0; + virtual ~NHPluginStrategy() {} + + NHHealthStatus *healthStatus; +}; diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index 727121e7e20..4ec89d4a1f1 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -21,6 +21,7 @@ limitations under the License. */ +#include "ts/nexthop.h" #include "tscore/ink_platform.h" #include @@ -122,39 +123,26 @@ is_api_result(HttpTransact::State *s) return r; } -// wrapper to choose between a remap next hop strategy or use parent.config -// remap next hop strategy is preferred +// wrapper to get the max_retries. +// Does NOT check the strategy; if strategy exists, strategy->responseIsRetryable should be called instead. inline static unsigned max_retries(HttpTransact::State *s, ParentRetry_t method) { - unsigned int r = 0; - url_mapping *mp = s->url_map.getMapping(); - - if (mp && mp->strategy) { - // remap strategies does not support unavailable_server_responses - if (method == PARENT_RETRY_SIMPLE) { - r = mp->strategy->max_simple_retries; - } - } else if (s->parent_params) { - r = s->parent_result.max_retries(method); + if (s->parent_params) { + return s->parent_result.max_retries(method); } - return r; + return 0; } -// wrapper to choose between a remap next hop strategy or use parent.config -// remap next hop strategy is preferred +// wrapper to get the numParents. +// Does NOT check the strategy; if strategy exists, strategy->responseIsRetryable should be called instead. inline static uint32_t numParents(HttpTransact::State *s) { - uint32_t r = 0; - url_mapping *mp = s->url_map.getMapping(); - - if (mp && mp->strategy) { - r = mp->strategy->num_parents; - } else if (s->parent_params) { - r = s->parent_params->numParents(&s->parent_result); + if (s->parent_params) { + return s->parent_params->numParents(&s->parent_result); } - return r; + return 0; } // wrapper to choose between a remap next hop strategy or use parent.config @@ -173,40 +161,15 @@ parent_is_proxy(HttpTransact::State *s) return r; } -// wrapper to choose between a remap next hop strategy or use parent.config -// remap next hop strategy is preferred -inline static bool -response_is_retryable(HttpTransact::State *s, HTTPStatus response_code) -{ - bool r = false; - url_mapping *mp = s->url_map.getMapping(); - - if (mp && mp->strategy) { - if (mp->strategy->resp_codes.codes.size() > 0) { - r = mp->strategy->resp_codes.contains(response_code); - } - } else if (s->parent_params) { - r = s->parent_result.response_is_retryable(response_code); - } - return r; -} - -// wrapper to choose between a remap next hop strategy or use parent.config -// remap next hop strategy is preferred +// wrapper to get the parent.config retry type. +// Does NOT check the strategy; if strategy exists, strategy->responseIsRetryable should be called instead. inline static unsigned retry_type(HttpTransact::State *s) { - unsigned r = PARENT_RETRY_NONE; - url_mapping *mp = s->url_map.getMapping(); - - if (mp && mp->strategy) { - if (mp->strategy->resp_codes.codes.size() > 0) { - r = PARENT_RETRY_SIMPLE; - } - } else if (s->parent_params) { - r = s->parent_result.retry_type(); + if (s->parent_params) { + return s->parent_result.retry_type(); } - return r; + return PARENT_RETRY_NONE; } // wrapper to choose between a remap next hop strategy or use parent.config @@ -217,8 +180,7 @@ findParent(HttpTransact::State *s) url_mapping *mp = s->url_map.getMapping(); if (mp && mp->strategy) { - return mp->strategy->findNextHop(s->state_machine->sm_id, s->parent_result, s->request_data, s->txn_conf->parent_fail_threshold, - s->txn_conf->parent_retry_time); + return mp->strategy->findNextHop(reinterpret_cast(s->state_machine)); } else if (s->parent_params) { return s->parent_params->findParent(&s->request_data, &s->parent_result, s->txn_conf->parent_fail_threshold, s->txn_conf->parent_retry_time); @@ -230,11 +192,12 @@ findParent(HttpTransact::State *s) inline static void markParentDown(HttpTransact::State *s) { + HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count); url_mapping *mp = s->url_map.getMapping(); if (mp && mp->strategy) { - return mp->strategy->markNextHopDown(s->state_machine->sm_id, s->parent_result, s->txn_conf->parent_fail_threshold, - s->txn_conf->parent_retry_time); + return mp->strategy->markNextHop(reinterpret_cast(s->state_machine), s->parent_result.hostname, + s->parent_result.port, NH_MARK_DOWN); } else if (s->parent_params) { return s->parent_params->markParentDown(&s->parent_result, s->txn_conf->parent_fail_threshold, s->txn_conf->parent_retry_time); } @@ -247,7 +210,8 @@ markParentUp(HttpTransact::State *s) { url_mapping *mp = s->url_map.getMapping(); if (mp && mp->strategy) { - return mp->strategy->markNextHopUp(s->state_machine->sm_id, s->parent_result); + return mp->strategy->markNextHop(reinterpret_cast(s->state_machine), s->parent_result.hostname, + s->parent_result.port, NH_MARK_UP); } else if (s->parent_params) { return s->parent_params->markParentUp(&s->parent_result); } @@ -260,7 +224,7 @@ parentExists(HttpTransact::State *s) { url_mapping *mp = s->url_map.getMapping(); if (mp && mp->strategy) { - return mp->strategy->nextHopExists(s->state_machine->sm_id); + return mp->strategy->nextHopExists(reinterpret_cast(s->state_machine)); } else if (s->parent_params) { return s->parent_params->parentExists(&s->request_data); } @@ -275,8 +239,7 @@ nextParent(HttpTransact::State *s) url_mapping *mp = s->url_map.getMapping(); if (mp && mp->strategy) { // NextHop only has a findNextHop() function. - return mp->strategy->findNextHop(s->state_machine->sm_id, s->parent_result, s->request_data, s->txn_conf->parent_fail_threshold, - s->txn_conf->parent_retry_time); + return mp->strategy->findNextHop(reinterpret_cast(s->state_machine)); } else if (s->parent_params) { return s->parent_params->nextParent(&s->request_data, &s->parent_result, s->txn_conf->parent_fail_threshold, s->txn_conf->parent_retry_time); @@ -310,40 +273,128 @@ is_response_unavailable_code(HTTPStatus response_code) return true; } -inline static void -simple_or_unavailable_server_retry(HttpTransact::State *s) +bool +HttpTransact::is_response_valid(State *s, HTTPHdr *incoming_response) { - // server response. - HTTPStatus server_response = http_hdr_status_get(s->hdr_info.server_response.m_http); + if (s->current.state != CONNECTION_ALIVE) { + ink_assert((s->current.state == CONNECTION_ERROR) || (s->current.state == OPEN_RAW_ERROR) || + (s->current.state == PARSE_ERROR) || (s->current.state == CONNECTION_CLOSED) || + (s->current.state == INACTIVE_TIMEOUT) || (s->current.state == ACTIVE_TIMEOUT) || + s->current.state == OUTBOUND_CONGESTION); - TxnDebug("http_trans", "[simple_or_unavailabe_server_retry] server_response = %d, simple_retry_attempts: %d, numParents:%d ", - server_response, s->current.simple_retry_attempts, numParents(s)); - // simple retry is enabled, 0x1 - if ((retry_type(s) & PARENT_RETRY_SIMPLE) && is_response_simple_code(server_response) && - s->current.simple_retry_attempts < max_retries(s, PARENT_RETRY_SIMPLE) && response_is_retryable(s, server_response)) { - TxnDebug("parent_select", "RECEIVED A SIMPLE RETRY RESPONSE"); - if (s->current.simple_retry_attempts < numParents(s)) { - s->current.state = HttpTransact::PARENT_RETRY; - s->current.retry_type = PARENT_RETRY_SIMPLE; - return; + s->hdr_info.response_error = CONNECTION_OPEN_FAILED; + return false; + } + + s->hdr_info.response_error = check_response_validity(s, incoming_response); + + switch (s->hdr_info.response_error) { +#ifdef REALLY_NEED_TO_CHECK_DATE_VALIDITY + case BOGUS_OR_NO_DATE_IN_RESPONSE: + // We could modify the response to add the date, if need be. + // incoming_response->set_date(s->request_sent_time); + return true; +#endif + case NO_RESPONSE_HEADER_ERROR: + TxnDebug("http_trans", "[is_response_valid] No errors in response"); + return true; + + case MISSING_REASON_PHRASE: + TxnDebug("http_trans", "[is_response_valid] Response Error: Missing reason phrase - allowing"); + return true; + + case STATUS_CODE_SERVER_ERROR: + TxnDebug("http_trans", "[is_response_valid] Response Error: Origin Server returned 500 - allowing"); + return true; + + case CONNECTION_OPEN_FAILED: + TxnDebug("http_trans", "[is_response_valid] Response Error: connection open failed"); + s->current.state = CONNECTION_ERROR; + return false; + + case NON_EXISTANT_RESPONSE_HEADER: + TxnDebug("http_trans", "[is_response_valid] Response Error: No response header"); + s->current.state = BAD_INCOMING_RESPONSE; + return false; + + case NOT_A_RESPONSE_HEADER: + TxnDebug("http_trans", "[is_response_valid] Response Error: Not a response header"); + s->current.state = BAD_INCOMING_RESPONSE; + return false; + + case MISSING_STATUS_CODE: + TxnDebug("http_trans", "[is_response_valid] Response Error: Missing status code"); + s->current.state = BAD_INCOMING_RESPONSE; + return false; + + default: + TxnDebug("http_trans", "[is_response_valid] Errors in response"); + s->current.state = BAD_INCOMING_RESPONSE; + return false; + } +} + +inline static ParentRetry_t +response_is_retryable(HttpTransact::State *s, HTTPStatus response_code) +{ + if (!HttpTransact::is_response_valid(s, &s->hdr_info.server_response) || s->current.request_to != HttpTransact::PARENT_PROXY) { + return PARENT_RETRY_NONE; + } + + const url_mapping *mp = s->url_map.getMapping(); + if (mp && mp->strategy) { + if (mp->strategy->responseIsRetryable(s->current.simple_retry_attempts, response_code)) { + if (mp->strategy->onFailureMarkParentDown(response_code)) { + return PARENT_RETRY_UNAVAILABLE_SERVER; + } else { + return PARENT_RETRY_SIMPLE; + } } else { - TxnDebug("http_trans", "PARENT_RETRY_SIMPLE: retried all parents, send response to client."); - return; + return PARENT_RETRY_NONE; } } - // unavailable server retry is enabled 0x2 - else if ((retry_type(s) & PARENT_RETRY_UNAVAILABLE_SERVER) && is_response_unavailable_code(server_response) && - s->current.unavailable_server_retry_attempts < max_retries(s, PARENT_RETRY_UNAVAILABLE_SERVER) && - response_is_retryable(s, server_response)) { - TxnDebug("parent_select", "RECEIVED A PARENT_RETRY_UNAVAILABLE_SERVER RESPONSE"); + + if (s->parent_params && !s->parent_result.response_is_retryable(response_code)) { + return PARENT_RETRY_NONE; + } + + const unsigned int s_retry_type = retry_type(s); + const HTTPStatus server_response = http_hdr_status_get(s->hdr_info.server_response.m_http); + if ((s_retry_type & PARENT_RETRY_SIMPLE) && is_response_simple_code(server_response) && + s->current.simple_retry_attempts < max_retries(s, PARENT_RETRY_SIMPLE)) { + if (s->current.simple_retry_attempts < numParents(s)) { + return PARENT_RETRY_SIMPLE; + } + return PARENT_RETRY_NONE; + } + if ((s_retry_type & PARENT_RETRY_UNAVAILABLE_SERVER) && is_response_unavailable_code(server_response) && + s->current.unavailable_server_retry_attempts < max_retries(s, PARENT_RETRY_UNAVAILABLE_SERVER)) { if (s->current.unavailable_server_retry_attempts < numParents(s)) { - s->current.state = HttpTransact::PARENT_RETRY; - s->current.retry_type = PARENT_RETRY_UNAVAILABLE_SERVER; - return; - } else { - TxnDebug("http_trans", "PARENT_RETRY_UNAVAILABLE_SERVER: retried all parents, send error to client."); - return; + return PARENT_RETRY_UNAVAILABLE_SERVER; } + return PARENT_RETRY_NONE; + } + return PARENT_RETRY_NONE; +} + +inline static void +simple_or_unavailable_server_retry(HttpTransact::State *s) +{ + HTTPStatus server_response = http_hdr_status_get(s->hdr_info.server_response.m_http); + switch (response_is_retryable(s, server_response)) { + case PARENT_RETRY_SIMPLE: + s->current.state = HttpTransact::PARENT_RETRY; + s->current.retry_type = PARENT_RETRY_SIMPLE; + break; + case PARENT_RETRY_UNAVAILABLE_SERVER: + s->current.state = HttpTransact::PARENT_RETRY; + s->current.retry_type = PARENT_RETRY_UNAVAILABLE_SERVER; + break; + case PARENT_RETRY_BOTH: + ink_assert(!"response_is_retryable should return an exact retry type, never both"); + break; + case PARENT_RETRY_NONE: + break; // no retry } } @@ -1656,7 +1707,6 @@ HttpTransact::PPDNSLookup(State *s) ink_assert(s->dns_info.looking_up == PARENT_PROXY); if (!s->dns_info.lookup_success) { // Mark parent as down due to resolving failure - HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count); markParentDown(s); // DNS lookup of parent failed, find next parent or o.s. if (find_server_and_update_current_info(s) == HttpTransact::HOST_NONE) { @@ -3404,7 +3454,7 @@ HttpTransact::HandleResponse(State *s) ink_release_assert(s->cache_info.action != CACHE_PREPARE_TO_WRITE); } - if (!is_response_valid(s, &s->hdr_info.server_response)) { + if (!HttpTransact::is_response_valid(s, &s->hdr_info.server_response)) { TxnDebug("http_seq", "[HttpTransact::HandleResponse] Response not valid"); } else { TxnDebug("http_seq", "[HttpTransact::HandleResponse] Response valid"); @@ -3543,13 +3593,7 @@ HttpTransact::handle_response_from_parent(State *s) TxnDebug("http_trans", "[handle_response_from_parent] (hrfp)"); HTTP_RELEASE_ASSERT(s->current.server == &s->parent_info); - // response is from a parent origin server. - if (is_response_valid(s, &s->hdr_info.server_response) && s->current.request_to == HttpTransact::PARENT_PROXY) { - // check for a retryable response if simple or unavailable server retry are enabled. - if (retry_type(s) & (PARENT_RETRY_SIMPLE | PARENT_RETRY_UNAVAILABLE_SERVER)) { - simple_or_unavailable_server_retry(s); - } - } + simple_or_unavailable_server_retry(s); s->parent_info.state = s->current.state; switch (s->current.state) { @@ -3564,28 +3608,13 @@ HttpTransact::handle_response_from_parent(State *s) break; case PARENT_RETRY: if (s->current.retry_type == PARENT_RETRY_SIMPLE) { - if (s->current.simple_retry_attempts >= max_retries(s, PARENT_RETRY_SIMPLE)) { - TxnDebug("http_trans", "PARENT_RETRY_SIMPLE: retried all parents, send error to client."); - s->current.retry_type = PARENT_RETRY_NONE; - } else { - s->current.simple_retry_attempts++; - TxnDebug("http_trans", "PARENT_RETRY_SIMPLE: try another parent."); - s->current.retry_type = PARENT_RETRY_NONE; - next_lookup = find_server_and_update_current_info(s); - } - } else if (s->current.retry_type == PARENT_RETRY_UNAVAILABLE_SERVER) { - if (s->current.unavailable_server_retry_attempts >= max_retries(s, PARENT_RETRY_UNAVAILABLE_SERVER)) { - TxnDebug("http_trans", "PARENT_RETRY_UNAVAILABLE_SERVER: retried all parents, send error to client."); - s->current.retry_type = PARENT_RETRY_NONE; - } else { - s->current.unavailable_server_retry_attempts++; - TxnDebug("http_trans", "PARENT_RETRY_UNAVAILABLE_SERVER: marking parent down and trying another."); - s->current.retry_type = PARENT_RETRY_NONE; - HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count); - markParentDown(s); - next_lookup = find_server_and_update_current_info(s); - } + s->current.simple_retry_attempts++; + } else { + markParentDown(s); + s->current.unavailable_server_retry_attempts++; } + next_lookup = find_server_and_update_current_info(s); + s->current.retry_type = PARENT_RETRY_NONE; break; default: TxnDebug("http_trans", "[hrfp] connection not alive"); @@ -3607,7 +3636,6 @@ HttpTransact::handle_response_from_parent(State *s) // If the request is not retryable, just give up! if (!is_request_retryable(s)) { if (s->current.state != OUTBOUND_CONGESTION) { - HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count); markParentDown(s); } s->parent_result.result = PARENT_FAIL; @@ -3635,7 +3663,6 @@ HttpTransact::handle_response_from_parent(State *s) // to the parent otherwise slow origin servers cause // us to mark the parent down if (s->current.state == CONNECTION_ERROR) { - HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count); markParentDown(s); } // We are done so look for another parent if any @@ -3647,7 +3674,6 @@ HttpTransact::handle_response_from_parent(State *s) HTTP_INCREMENT_DYN_STAT(http_total_parent_retries_exhausted_stat); TxnDebug("http_trans", "[handle_response_from_parent] Error. No more retries."); if (s->current.state == CONNECTION_ERROR) { - HTTP_INCREMENT_DYN_STAT(http_total_parent_marked_down_count); markParentDown(s); } s->parent_result.result = PARENT_FAIL; @@ -6493,67 +6519,6 @@ HttpTransact::is_request_retryable(State *s) return true; } -bool -HttpTransact::is_response_valid(State *s, HTTPHdr *incoming_response) -{ - if (s->current.state != CONNECTION_ALIVE) { - ink_assert((s->current.state == CONNECTION_ERROR) || (s->current.state == OPEN_RAW_ERROR) || - (s->current.state == PARSE_ERROR) || (s->current.state == CONNECTION_CLOSED) || - (s->current.state == INACTIVE_TIMEOUT) || (s->current.state == ACTIVE_TIMEOUT) || - s->current.state == OUTBOUND_CONGESTION); - - s->hdr_info.response_error = CONNECTION_OPEN_FAILED; - return false; - } - - s->hdr_info.response_error = check_response_validity(s, incoming_response); - - switch (s->hdr_info.response_error) { -#ifdef REALLY_NEED_TO_CHECK_DATE_VALIDITY - case BOGUS_OR_NO_DATE_IN_RESPONSE: - // We could modify the response to add the date, if need be. - // incoming_response->set_date(s->request_sent_time); - return true; -#endif - case NO_RESPONSE_HEADER_ERROR: - TxnDebug("http_trans", "[is_response_valid] No errors in response"); - return true; - - case MISSING_REASON_PHRASE: - TxnDebug("http_trans", "[is_response_valid] Response Error: Missing reason phrase - allowing"); - return true; - - case STATUS_CODE_SERVER_ERROR: - TxnDebug("http_trans", "[is_response_valid] Response Error: Origin Server returned 500 - allowing"); - return true; - - case CONNECTION_OPEN_FAILED: - TxnDebug("http_trans", "[is_response_valid] Response Error: connection open failed"); - s->current.state = CONNECTION_ERROR; - return false; - - case NON_EXISTANT_RESPONSE_HEADER: - TxnDebug("http_trans", "[is_response_valid] Response Error: No response header"); - s->current.state = BAD_INCOMING_RESPONSE; - return false; - - case NOT_A_RESPONSE_HEADER: - TxnDebug("http_trans", "[is_response_valid] Response Error: Not a response header"); - s->current.state = BAD_INCOMING_RESPONSE; - return false; - - case MISSING_STATUS_CODE: - TxnDebug("http_trans", "[is_response_valid] Response Error: Missing status code"); - s->current.state = BAD_INCOMING_RESPONSE; - return false; - - default: - TxnDebug("http_trans", "[is_response_valid] Errors in response"); - s->current.state = BAD_INCOMING_RESPONSE; - return false; - } -} - void HttpTransact::process_quick_http_filter(State *s, int method) { diff --git a/proxy/http/remap/Makefile.am b/proxy/http/remap/Makefile.am index af15a57556a..97e3e953758 100644 --- a/proxy/http/remap/Makefile.am +++ b/proxy/http/remap/Makefile.am @@ -41,6 +41,7 @@ libhttp_remap_a_SOURCES = \ NextHopSelectionStrategy.cc \ NextHopConsistentHash.h \ NextHopConsistentHash.cc \ + NextHopHealthStatus.cc \ NextHopRoundRobin.h \ NextHopRoundRobin.cc \ NextHopStrategyFactory.h \ @@ -148,6 +149,7 @@ test_NextHopStrategyFactory_SOURCES = \ NextHopStrategyFactory.cc \ NextHopRoundRobin.cc \ NextHopConsistentHash.cc \ + NextHopHealthStatus.cc \ unit-tests/test_NextHopStrategyFactory.cc \ unit-tests/nexthop_test_stubs.cc @@ -178,6 +180,7 @@ test_NextHopRoundRobin_SOURCES = \ NextHopStrategyFactory.cc \ NextHopRoundRobin.cc \ NextHopConsistentHash.cc \ + NextHopHealthStatus.cc \ unit-tests/test_NextHopRoundRobin.cc \ unit-tests/nexthop_test_stubs.cc @@ -207,6 +210,7 @@ test_NextHopConsistentHash_SOURCES = \ NextHopSelectionStrategy.cc \ NextHopStrategyFactory.cc \ NextHopConsistentHash.cc \ + NextHopHealthStatus.cc \ NextHopRoundRobin.cc \ unit-tests/test_NextHopConsistentHash.cc \ unit-tests/nexthop_test_stubs.cc diff --git a/proxy/http/remap/NextHopConsistentHash.cc b/proxy/http/remap/NextHopConsistentHash.cc index f36605cab54..4951fe08464 100644 --- a/proxy/http/remap/NextHopConsistentHash.cc +++ b/proxy/http/remap/NextHopConsistentHash.cc @@ -24,6 +24,7 @@ #include #include "tscore/HashSip.h" +#include "HttpSM.h" #include "NextHopConsistentHash.h" // hash_key strings. @@ -207,81 +208,84 @@ NextHopConsistentHash::getHashKey(uint64_t sm_id, HttpRequestData *hrdata, ATSHa } void -NextHopConsistentHash::findNextHop(const uint64_t sm_id, ParentResult &result, RequestData &rdata, const uint64_t fail_threshold, - const uint64_t retry_time, time_t now) +NextHopConsistentHash::findNextHop(TSHttpTxn txnp, void *ih, time_t now) { - time_t _now = now; - bool firstcall = false; - bool nextHopRetry = false; - bool wrapped = false; + HttpSM *sm = reinterpret_cast(txnp); + ParentResult *result = &sm->t_state.parent_result; + HttpRequestData request_info = sm->t_state.request_data; + int64_t sm_id = sm->sm_id; + int64_t retry_time = sm->t_state.txn_conf->parent_retry_time; + time_t _now = now; + bool firstcall = false; + bool nextHopRetry = false; + bool wrapped = false; std::vector wrap_around(groups, false); uint32_t cur_ring = 0; // there is a hash ring for each host group uint64_t hash_key = 0; uint32_t lookups = 0; ATSHash64Sip24 hash; - HttpRequestData *request_info = static_cast(&rdata); HostRecord *hostRec = nullptr; std::shared_ptr pRec = nullptr; HostStatus &pStatus = HostStatus::instance(); HostStatus_t host_stat = HostStatus_t::HOST_STATUS_INIT; HostStatRec *hst = nullptr; - if (result.line_number == -1 && result.result == PARENT_UNDEFINED) { + if (result->line_number == -1 && result->result == PARENT_UNDEFINED) { firstcall = true; } if (firstcall) { - NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] firstcall, line_number: %d, result: %s", sm_id, result.line_number, - ParentResultStr[result.result]); - result.line_number = distance; - cur_ring = 0; + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] firstcall, line_number: %d, result: %s", sm_id, result->line_number, + ParentResultStr[result->result]); + result->line_number = distance; + cur_ring = 0; for (uint32_t i = 0; i < groups; i++) { - result.chash_init[i] = false; - wrap_around[i] = false; + result->chash_init[i] = false; + wrap_around[i] = false; } } else { - NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] not firstcall, line_number: %d, result: %s", sm_id, result.line_number, - ParentResultStr[result.result]); + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] not firstcall, line_number: %d, result: %s", sm_id, result->line_number, + ParentResultStr[result->result]); switch (ring_mode) { case NH_ALTERNATE_RING: if (groups > 1) { - cur_ring = (result.last_group + 1) % groups; + cur_ring = (result->last_group + 1) % groups; } else { - cur_ring = result.last_group; + cur_ring = result->last_group; } break; case NH_EXHAUST_RING: default: if (!wrapped) { - cur_ring = result.last_group; + cur_ring = result->last_group; } else if (groups > 1) { - cur_ring = (result.last_group + 1) % groups; + cur_ring = (result->last_group + 1) % groups; } break; } } // Do the initial parent look-up. - hash_key = getHashKey(sm_id, request_info, &hash); + hash_key = getHashKey(sm_id, &request_info, &hash); do { // search until we've selected a different parent if !firstcall std::shared_ptr r = rings[cur_ring]; - hostRec = chash_lookup(r, hash_key, &result.chashIter[cur_ring], &wrapped, &hash, &result.chash_init[cur_ring], - &result.mapWrapped[cur_ring], sm_id); + hostRec = chash_lookup(r, hash_key, &result->chashIter[cur_ring], &wrapped, &hash, &result->chash_init[cur_ring], + &result->mapWrapped[cur_ring], sm_id); wrap_around[cur_ring] = wrapped; lookups++; // the 'available' flag is maintained in 'host_groups' and not the hash ring. if (hostRec) { pRec = host_groups[hostRec->group_index][hostRec->host_index]; if (firstcall) { - hst = (pRec) ? pStatus.getHostStatus(pRec->hostname.c_str()) : nullptr; - result.first_choice_status = (hst) ? hst->status : HostStatus_t::HOST_STATUS_UP; + hst = (pRec) ? pStatus.getHostStatus(pRec->hostname.c_str()) : nullptr; + result->first_choice_status = (hst) ? hst->status : HostStatus_t::HOST_STATUS_UP; break; } } else { pRec = nullptr; } - } while (pRec && result.hostname && strcmp(pRec->hostname.c_str(), result.hostname) == 0); + } while (pRec && result->hostname && strcmp(pRec->hostname.c_str(), result->hostname) == 0); NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] Initial parent lookups: %d", sm_id, lookups); @@ -305,11 +309,11 @@ NextHopConsistentHash::findNextHop(const uint64_t sm_id, ParentResult &result, R _now == 0 ? _now = time(nullptr) : _now = now; // check if the host is retryable. It's retryable if the retry window has elapsed if ((pRec->failedAt + retry_time) < static_cast(_now)) { - nextHopRetry = true; - result.last_parent = pRec->host_index; - result.last_lookup = pRec->group_index; - result.retry = nextHopRetry; - result.result = PARENT_SPECIFIED; + nextHopRetry = true; + result->last_parent = pRec->host_index; + result->last_lookup = pRec->group_index; + result->retry = nextHopRetry; + result->result = PARENT_SPECIFIED; NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] next hop %s is now retryable, marked it available.", sm_id, pRec->hostname.c_str()); break; } @@ -328,8 +332,8 @@ NextHopConsistentHash::findNextHop(const uint64_t sm_id, ParentResult &result, R break; } std::shared_ptr r = rings[cur_ring]; - hostRec = chash_lookup(r, hash_key, &result.chashIter[cur_ring], &wrapped, &hash, &result.chash_init[cur_ring], - &result.mapWrapped[cur_ring], sm_id); + hostRec = chash_lookup(r, hash_key, &result->chashIter[cur_ring], &wrapped, &hash, &result->chash_init[cur_ring], + &result->mapWrapped[cur_ring], sm_id); wrap_around[cur_ring] = wrapped; lookups++; if (hostRec) { @@ -372,37 +376,36 @@ NextHopConsistentHash::findNextHop(const uint64_t sm_id, ParentResult &result, R // Validate and return the final result. // ---------------------------------------------------------------------------------------------------- - // use the available or marked for retry parent. - hst = (pRec) ? pStatus.getHostStatus(pRec->hostname.c_str()) : nullptr; - host_stat = (hst) ? hst->status : HostStatus_t::HOST_STATUS_UP; - - if (pRec && host_stat == HOST_STATUS_UP && (pRec->available || result.retry)) { - result.result = PARENT_SPECIFIED; - result.hostname = pRec->hostname.c_str(); - result.last_parent = pRec->host_index; - result.last_lookup = result.last_group = cur_ring; + if (pRec && host_stat == HOST_STATUS_UP && (pRec->available || result->retry)) { + result->result = PARENT_SPECIFIED; + result->hostname = pRec->hostname.c_str(); + result->last_parent = pRec->host_index; + result->last_lookup = result->last_group = cur_ring; switch (scheme) { case NH_SCHEME_NONE: case NH_SCHEME_HTTP: - result.port = pRec->getPort(scheme); + result->port = pRec->getPort(scheme); break; case NH_SCHEME_HTTPS: - result.port = pRec->getPort(scheme); + result->port = pRec->getPort(scheme); break; } - result.retry = nextHopRetry; - ink_assert(result.hostname != nullptr); - ink_assert(result.port != 0); - NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] Chosen parent: %s.%d", sm_id, result.hostname, result.port); + result->retry = nextHopRetry; + ink_assert(result->hostname != nullptr); + ink_assert(result->port != 0); + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] result->result: %s Chosen parent: %s.%d", sm_id, ParentResultStr[result->result], + result->hostname, result->port); } else { if (go_direct == true) { - result.result = PARENT_DIRECT; + result->result = PARENT_DIRECT; } else { - result.result = PARENT_FAIL; + result->result = PARENT_FAIL; } - result.hostname = nullptr; - result.port = 0; - result.retry = false; + result->hostname = nullptr; + result->port = 0; + result->retry = false; + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] result->result: %s set hostname null port 0 retry false", sm_id, + ParentResultStr[result->result]); } return; diff --git a/proxy/http/remap/NextHopConsistentHash.h b/proxy/http/remap/NextHopConsistentHash.h index 47fcf9e1aae..6ce8cffb877 100644 --- a/proxy/http/remap/NextHopConsistentHash.h +++ b/proxy/http/remap/NextHopConsistentHash.h @@ -49,6 +49,5 @@ class NextHopConsistentHash : public NextHopSelectionStrategy NextHopConsistentHash(const std::string_view name, const NHPolicyType &policy) : NextHopSelectionStrategy(name, policy) {} ~NextHopConsistentHash(); bool Init(const YAML::Node &n); - void findNextHop(const uint64_t sm_id, ParentResult &result, RequestData &rdata, const uint64_t fail_threshold, - const uint64_t retry_time, time_t now = 0) override; + void findNextHop(TSHttpTxn txnp, void *ih = nullptr, time_t now = 0) override; }; diff --git a/proxy/http/remap/NextHopHealthStatus.cc b/proxy/http/remap/NextHopHealthStatus.cc new file mode 100644 index 00000000000..4ae65800fa9 --- /dev/null +++ b/proxy/http/remap/NextHopHealthStatus.cc @@ -0,0 +1,152 @@ +/** @file + + Implementation of nexthop consistent hash selections strategies. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "NextHopSelectionStrategy.h" +#include "HttpSM.h" + +/** + * initialize the host_map + */ +void +NextHopHealthStatus::insert(std::vector> &hosts) +{ + for (uint32_t ii = 0; ii < hosts.size(); ii++) { + std::shared_ptr h = hosts[ii]; + for (auto protocol = h->protocols.begin(); protocol != h->protocols.end(); ++protocol) { + const std::string host_port = h->getHostPort((*protocol)->port); + host_map.emplace(std::make_pair(host_port, h)); + NH_Debug(NH_DEBUG_TAG, "inserting %s into host_map", host_port.c_str()); + } + } +} + +/** + * check that hostname is available for use. + */ +bool +NextHopHealthStatus::isNextHopAvailable(TSHttpTxn txn, const char *hostname, const int port, void *ih) +{ + HttpSM *sm = reinterpret_cast(txn); + int64_t sm_id = sm->sm_id; + + const std::string host_port = HostRecord::makeHostPort(hostname, port); + auto iter = host_map.find(host_port); + + if (iter == host_map.end()) { + NH_Debug(NH_DEBUG_TAG, "[%" PRId64 "] no host named %s found in host_map", sm_id, host_port.c_str()); + return false; + } + + std::shared_ptr p = iter->second; + return p->available; +} + +/** + * mark up or down the indicated host + */ +void +NextHopHealthStatus::markNextHop(TSHttpTxn txn, const char *hostname, const int port, const NHCmd status, void *ih, + const time_t now) +{ + time_t _now; + now == 0 ? _now = time(nullptr) : _now = now; + + HttpSM *sm = reinterpret_cast(txn); + ParentResult result = sm->t_state.parent_result; + int64_t sm_id = sm->sm_id; + int64_t fail_threshold = sm->t_state.txn_conf->parent_fail_threshold; + int64_t retry_time = sm->t_state.txn_conf->parent_retry_time; + uint32_t new_fail_count = 0; + + // make sure we're called back with a result structure for a parent + // that is being retried. + if (status == NH_MARK_UP) { + ink_assert(result.retry == true); + } + if (result.result != PARENT_SPECIFIED) { + return; + } + + // No failover exists when the result is set through the API. + if (result.is_api_result()) { + return; + } + + const std::string host_port = HostRecord::makeHostPort(hostname, port); + auto iter = host_map.find(host_port); + if (iter == host_map.end()) { + NH_Debug(NH_DEBUG_TAG, "[%" PRId64 "] no host named %s found in host_map", sm_id, host_port.c_str()); + return; + } + + std::shared_ptr h = iter->second; + + switch (status) { + // Mark the host up. + case NH_MARK_UP: + if (!h->available) { + h->set_available(); + NH_Note("[%" PRId64 "] http parent proxy %s restored", sm_id, hostname); + } + break; + // Mark the host down. + case NH_MARK_DOWN: + if (h->failedAt == 0 || result.retry == true) { + { // lock guard + std::lock_guard guard(h->_mutex); + if (h->failedAt == 0) { + h->failedAt = _now; + if (result.retry == false) { + new_fail_count = h->failCount = 1; + } + } else if (result.retry == true) { + h->failedAt = _now; + } + } // end lock guard + NH_Note("[%" PRId64 "] NextHop %s marked as down %s", sm_id, (result.retry) ? "retry" : "initially", h->hostname.c_str()); + } else { + int old_count = 0; + // if the last failure was outside the retry window, set the failcount to 1 and failedAt to now. + { // lock guard + std::lock_guard lock(h->_mutex); + if ((h->failedAt + retry_time) < static_cast(_now)) { + h->failCount = 1; + h->failedAt = _now; + } else { + old_count = h->failCount = 1; + } + new_fail_count = old_count + 1; + } // end of lock_guard + NH_Debug(NH_DEBUG_TAG, "[%" PRId64 "] Parent fail count increased to %d for %s", sm_id, new_fail_count, h->hostname.c_str()); + } + + if (new_fail_count >= fail_threshold) { + h->set_unavailable(); + NH_Note("[%" PRId64 "] Failure threshold met failcount:%d >= threshold:%" PRId64 ", http parent proxy %s marked down", sm_id, + new_fail_count, fail_threshold, h->hostname.c_str()); + NH_Debug(NH_DEBUG_TAG, "[%" PRId64 "] NextHop %s marked unavailable, h->available=%s", sm_id, h->hostname.c_str(), + (h->available) ? "true" : "false"); + } + break; + } +} diff --git a/proxy/http/remap/NextHopRoundRobin.cc b/proxy/http/remap/NextHopRoundRobin.cc index 8bdcbc59205..0bbcc298726 100644 --- a/proxy/http/remap/NextHopRoundRobin.cc +++ b/proxy/http/remap/NextHopRoundRobin.cc @@ -24,6 +24,7 @@ #include #include +#include "HttpSM.h" #include "NextHopRoundRobin.h" NextHopRoundRobin::~NextHopRoundRobin() @@ -32,14 +33,19 @@ NextHopRoundRobin::~NextHopRoundRobin() } void -NextHopRoundRobin::findNextHop(const uint64_t sm_id, ParentResult &result, RequestData &rdata, const uint64_t fail_threshold, - const uint64_t retry_time, time_t now) +NextHopRoundRobin::findNextHop(TSHttpTxn txnp, void *ih, time_t now) { - time_t _now = now; - bool firstcall = true; - bool parentUp = false; - bool parentRetry = false; - bool wrapped = result.wrap_around; + HttpSM *sm = reinterpret_cast(txnp); + ParentResult *result = &sm->t_state.parent_result; + HttpRequestData request_info = sm->t_state.request_data; + int64_t sm_id = sm->sm_id; + int64_t fail_threshold = sm->t_state.txn_conf->parent_fail_threshold; + int64_t retry_time = sm->t_state.txn_conf->parent_retry_time; + time_t _now = now; + bool firstcall = true; + bool parentUp = false; + bool parentRetry = false; + bool wrapped = result->wrap_around; std::vector wrap_around(groups, false); uint32_t cur_hst_index = 0; uint32_t cur_grp_index = 0; @@ -47,41 +53,40 @@ NextHopRoundRobin::findNextHop(const uint64_t sm_id, ParentResult &result, Reque uint32_t start_group = 0; uint32_t start_host = 0; std::shared_ptr cur_host; - HostStatus &pStatus = HostStatus::instance(); - HttpRequestData *request_info = static_cast(&rdata); - HostStatus_t host_stat = HostStatus_t::HOST_STATUS_UP; + HostStatus &pStatus = HostStatus::instance(); + HostStatus_t host_stat = HostStatus_t::HOST_STATUS_UP; - if (result.line_number != -1 && result.result != PARENT_UNDEFINED) { + if (result->line_number != -1 && result->result != PARENT_UNDEFINED) { firstcall = false; } if (firstcall) { // distance is the index into the strategies map, this is the equivalent to the old line_number in parent.config. - result.line_number = distance; + result->line_number = distance; NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] first call , cur_grp_index: %d, cur_hst_index: %d, distance: %d", sm_id, cur_grp_index, cur_hst_index, distance); switch (policy_type) { case NH_FIRST_LIVE: - result.start_parent = cur_hst_index = 0; - cur_grp_index = 0; + result->start_parent = cur_hst_index = 0; + cur_grp_index = 0; break; case NH_RR_STRICT: { std::lock_guard lock(_mutex); - cur_hst_index = result.start_parent = this->hst_index; - cur_grp_index = 0; - this->hst_index = (this->hst_index + 1) % hst_size; + cur_hst_index = result->start_parent = this->hst_index; + cur_grp_index = 0; + this->hst_index = (this->hst_index + 1) % hst_size; } break; case NH_RR_IP: cur_grp_index = 0; - if (rdata.get_client_ip() != nullptr) { - cur_hst_index = result.start_parent = ntohl(ats_ip_hash(rdata.get_client_ip())) % hst_size; + if (request_info.get_client_ip() != nullptr) { + cur_hst_index = result->start_parent = ntohl(ats_ip_hash(request_info.get_client_ip())) % hst_size; } else { cur_hst_index = this->hst_index; } break; case NH_RR_LATCHED: cur_grp_index = 0; - cur_hst_index = result.start_parent = latched_index; + cur_hst_index = result->start_parent = latched_index; break; default: ink_assert(0); @@ -93,20 +98,20 @@ NextHopRoundRobin::findNextHop(const uint64_t sm_id, ParentResult &result, Reque NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] next call, cur_grp_index: %d, cur_hst_index: %d, distance: %d", sm_id, cur_grp_index, cur_hst_index, distance); // Move to next parent due to failure - latched_index = cur_hst_index = (result.last_parent + 1) % hst_size; + latched_index = cur_hst_index = (result->last_parent + 1) % hst_size; cur_host = host_groups[cur_grp_index][cur_hst_index]; // Check to see if we have wrapped around - if (static_cast(cur_hst_index) == result.start_parent) { + if (static_cast(cur_hst_index) == result->start_parent) { // We've wrapped around so bypass if we can if (go_direct == true) { - result.result = PARENT_DIRECT; + result->result = PARENT_DIRECT; } else { - result.result = PARENT_FAIL; + result->result = PARENT_FAIL; } - result.hostname = nullptr; - result.port = 0; - result.wrap_around = true; + result->hostname = nullptr; + result->port = 0; + result->wrap_around = true; return; } } @@ -130,7 +135,7 @@ NextHopRoundRobin::findNextHop(const uint64_t sm_id, ParentResult &result, Reque "[%" PRIu64 "] Selected a parent, %s, failCount (faileAt: %d failCount: %d), FailThreshold: %" PRIu64 ", request_info->xact_start: %ld", sm_id, cur_host->hostname.c_str(), (unsigned)cur_host->failedAt, cur_host->failCount, fail_threshold, - request_info->xact_start); + request_info.xact_start); // check if 'cur_host' is available, mark it up if it is. if ((cur_host->failedAt == 0) || (cur_host->failCount < fail_threshold)) { if (host_stat == HOST_STATUS_UP) { @@ -143,7 +148,7 @@ NextHopRoundRobin::findNextHop(const uint64_t sm_id, ParentResult &result, Reque } else { // if not available, check to see if it can be retried. If so, set the retry flag and temporairly mark it as // available. _now == 0 ? _now = time(nullptr) : _now = now; - if (((result.wrap_around) || (cur_host->failedAt + retry_time) < static_cast(_now)) && + if (((result->wrap_around) || (cur_host->failedAt + retry_time) < static_cast(_now)) && host_stat == HOST_STATUS_UP) { // Reuse the parent parentUp = true; @@ -160,15 +165,15 @@ NextHopRoundRobin::findNextHop(const uint64_t sm_id, ParentResult &result, Reque // The selected host is available or retryable, return the search result. if (parentUp == true && host_stat != HOST_STATUS_DOWN) { NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] status for %s: %s", sm_id, cur_host->hostname.c_str(), HostStatusNames[host_stat]); - result.result = PARENT_SPECIFIED; - result.hostname = cur_host->hostname.c_str(); - result.port = cur_host->getPort(scheme); - result.last_parent = cur_hst_index; - result.last_group = cur_grp_index; - result.retry = parentRetry; - ink_assert(result.hostname != nullptr); - ink_assert(result.port != 0); - NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] Chosen parent = %s.%d", sm_id, result.hostname, result.port); + result->result = PARENT_SPECIFIED; + result->hostname = cur_host->hostname.c_str(); + result->port = cur_host->getPort(scheme); + result->last_parent = cur_hst_index; + result->last_group = cur_grp_index; + result->retry = parentRetry; + ink_assert(result->hostname != nullptr); + ink_assert(result->port != 0); + NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] Chosen parent = %s.%d", sm_id, result->hostname, result->port); return; } @@ -176,7 +181,7 @@ NextHopRoundRobin::findNextHop(const uint64_t sm_id, ParentResult &result, Reque if (groups == 1) { latched_index = cur_hst_index = (cur_hst_index + 1) % hst_size; if (start_host == cur_hst_index) { - wrap_around[cur_grp_index] = wrapped = result.wrap_around = true; + wrap_around[cur_grp_index] = wrapped = result->wrap_around = true; } } else { // search the fail over groups. if (ring_mode == NH_ALTERNATE_RING) { // use alternating ring mode. @@ -185,7 +190,7 @@ NextHopRoundRobin::findNextHop(const uint64_t sm_id, ParentResult &result, Reque if (cur_grp_index == start_group) { latched_index = cur_hst_index = (cur_hst_index + 1) % hst_size; if (cur_hst_index == start_host) { - wrapped = wrap_around[cur_grp_index] = result.wrap_around = true; + wrapped = wrap_around[cur_grp_index] = result->wrap_around = true; } } } else { // use the exhaust ring mode. @@ -194,7 +199,7 @@ NextHopRoundRobin::findNextHop(const uint64_t sm_id, ParentResult &result, Reque wrap_around[cur_grp_index] = true; cur_grp_index = (cur_grp_index + 1) % groups; if (cur_grp_index == start_group) { - wrapped = wrap_around[cur_grp_index] = result.wrap_around = true; + wrapped = wrap_around[cur_grp_index] = result->wrap_around = true; } else { start_host = cur_hst_index = 0; } @@ -209,11 +214,11 @@ NextHopRoundRobin::findNextHop(const uint64_t sm_id, ParentResult &result, Reque } while (!wrapped); if (go_direct == true) { - result.result = PARENT_DIRECT; + result->result = PARENT_DIRECT; } else { - result.result = PARENT_FAIL; + result->result = PARENT_FAIL; } - result.hostname = nullptr; - result.port = 0; + result->hostname = nullptr; + result->port = 0; } diff --git a/proxy/http/remap/NextHopRoundRobin.h b/proxy/http/remap/NextHopRoundRobin.h index 80b05e01af5..a7cb8233153 100644 --- a/proxy/http/remap/NextHopRoundRobin.h +++ b/proxy/http/remap/NextHopRoundRobin.h @@ -40,6 +40,5 @@ class NextHopRoundRobin : public NextHopSelectionStrategy { return NextHopSelectionStrategy::Init(n); } - void findNextHop(const uint64_t sm_id, ParentResult &result, RequestData &rdata, const uint64_t fail_threshold, - const uint64_t retry_time, time_t now = 0) override; + void findNextHop(TSHttpTxn txnp, void *ih = nullptr, time_t now = 0) override; }; diff --git a/proxy/http/remap/NextHopSelectionStrategy.cc b/proxy/http/remap/NextHopSelectionStrategy.cc index 0507350c97e..0b0f76638fa 100644 --- a/proxy/http/remap/NextHopSelectionStrategy.cc +++ b/proxy/http/remap/NextHopSelectionStrategy.cc @@ -23,6 +23,7 @@ #include #include "I_Machine.h" +#include "HttpSM.h" #include "NextHopSelectionStrategy.h" // ring mode strings @@ -179,6 +180,7 @@ NextHopSelectionStrategy::Init(const YAML::Node &n) hosts_inner.push_back(std::move(host_rec)); num_parents++; } + passive_health.insert(hosts_inner); host_groups.push_back(std::move(hosts_inner)); } } @@ -193,105 +195,18 @@ NextHopSelectionStrategy::Init(const YAML::Node &n) } void -NextHopSelectionStrategy::markNextHopDown(const uint64_t sm_id, ParentResult &result, const uint64_t fail_threshold, - const uint64_t retry_time, time_t now) +NextHopSelectionStrategy::markNextHop(TSHttpTxn txnp, const char *hostname, const int port, const NHCmd status, void *ih, + const time_t now) { - time_t _now; - now == 0 ? _now = time(nullptr) : _now = now; - uint32_t new_fail_count = 0; - - // Make sure that we are being called back with with a - // result structure with a selected parent. - if (result.result != PARENT_SPECIFIED) { - return; - } - // If we were set through the API we currently have not failover - // so just return fail - if (result.is_api_result()) { - ink_assert(0); - return; - } - uint32_t hst_size = host_groups[result.last_group].size(); - ink_assert(result.last_parent < hst_size); - std::shared_ptr h = host_groups[result.last_group][result.last_parent]; - - // If the parent has already been marked down, just increment - // the failure count. If this is the first mark down on a - // parent we need to both set the failure time and set - // count to one. If this was the result of a retry, we - // must update move the failedAt timestamp to now so that we - // continue negative cache the parent - if (h->failedAt == 0 || result.retry == true) { - { // start of lock_guard scope. - std::lock_guard lock(h->_mutex); - if (h->failedAt == 0) { - // Mark the parent failure time. - h->failedAt = _now; - if (result.retry == false) { - new_fail_count = h->failCount = 1; - } - } else if (result.retry == true) { - h->failedAt = _now; - } - } // end of lock_guard scope - NH_Note("[%" PRIu64 "] NextHop %s marked as down %s:%d", sm_id, (result.retry) ? "retry" : "initially", h->hostname.c_str(), - h->getPort(scheme)); - - } else { - int old_count = 0; - - // if the last failure was outside the retry window, set the failcount to 1 and failedAt to now. - { // start of lock_guard_scope - std::lock_guard lock(h->_mutex); - if ((h->failedAt + retry_time) < static_cast(_now)) { - h->failCount = 1; - h->failedAt = _now; - } else { - old_count = h->failCount = 1; - } - new_fail_count = old_count + 1; - } // end of lock_guard - NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] Parent fail count increased to %d for %s:%d", sm_id, new_fail_count, h->hostname.c_str(), - h->getPort(scheme)); - } - - if (new_fail_count >= fail_threshold) { - h->set_unavailable(); - NH_Note("[%" PRIu64 "] Failure threshold met failcount:%d >= threshold:%" PRIu64 ", http parent proxy %s:%d marked down", sm_id, - new_fail_count, fail_threshold, h->hostname.c_str(), h->getPort(scheme)); - NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] NextHop %s:%d marked unavailable, h->available=%s", sm_id, h->hostname.c_str(), - h->getPort(scheme), (h->available) ? "true" : "false"); - } -} - -void -NextHopSelectionStrategy::markNextHopUp(const uint64_t sm_id, ParentResult &result) -{ - // Make sure that we are being called back with with a - // result structure with a parent that is being retried - ink_assert(result.retry == true); - if (result.result != PARENT_SPECIFIED) { - return; - } - // If we were set through the API we currently have not failover - // so just return fail - if (result.is_api_result()) { - ink_assert(0); - return; - } - uint32_t hst_size = host_groups[result.last_group].size(); - ink_assert(result.last_parent < hst_size); - std::shared_ptr h = host_groups[result.last_group][result.last_parent]; - - if (!h->available) { - h->set_available(); - NH_Note("[%" PRIu64 "] http parent proxy %s:%d restored", sm_id, h->hostname.c_str(), h->getPort(scheme)); - } + return passive_health.markNextHop(txnp, hostname, port, status, ih, now); } bool -NextHopSelectionStrategy::nextHopExists(const uint64_t sm_id) +NextHopSelectionStrategy::nextHopExists(TSHttpTxn txnp, void *ih) { + HttpSM *sm = reinterpret_cast(txnp); + int64_t sm_id = sm->sm_id; + for (uint32_t gg = 0; gg < groups; gg++) { for (uint32_t hh = 0; hh < host_groups[gg].size(); hh++) { HostRecord *p = host_groups[gg][hh].get(); @@ -304,6 +219,19 @@ NextHopSelectionStrategy::nextHopExists(const uint64_t sm_id) return false; } +bool +NextHopSelectionStrategy::responseIsRetryable(unsigned int current_retry_attempts, HTTPStatus response_code) +{ + return this->resp_codes.contains(response_code) && current_retry_attempts < this->max_simple_retries && + current_retry_attempts < this->num_parents; +} + +bool +NextHopSelectionStrategy::onFailureMarkParentDown(HTTPStatus response_code) +{ + return static_cast(response_code) >= 500 && static_cast(response_code) <= 599; +} + namespace YAML { template <> struct convert { diff --git a/proxy/http/remap/NextHopSelectionStrategy.h b/proxy/http/remap/NextHopSelectionStrategy.h index 77a8b11cea6..6076188f00c 100644 --- a/proxy/http/remap/NextHopSelectionStrategy.h +++ b/proxy/http/remap/NextHopSelectionStrategy.h @@ -23,6 +23,7 @@ #pragma once +#include "ts/nexthop.h" #include "ParentSelection.h" #ifndef _NH_UNIT_TESTS_ @@ -122,7 +123,7 @@ struct HostRecord : ATSConsistentHashNode { failCount = o.failCount; upAt = o.upAt; weight = o.weight; - hash_string = ""; + hash_string = o.hash_string; host_index = -1; group_index = -1; available = true; @@ -170,7 +171,7 @@ struct HostRecord : ATSConsistentHashNode { } int - getPort(NHSchemeType scheme) + getPort(NHSchemeType scheme) const { int port = 0; for (uint32_t i = 0; i < protocols.size(); i++) { @@ -181,6 +182,31 @@ struct HostRecord : ATSConsistentHashNode { } return port; } + + static std::string + makeHostPort(const std::string &hostname, const int port) + { + return hostname + ":" + std::to_string(port); + } + + std::string + getHostPort(const int port) const + { + return makeHostPort(this->hostname, port); + } +}; + +class NextHopHealthStatus : public NHHealthStatus +{ +public: + void insert(std::vector> &hosts); + bool isNextHopAvailable(TSHttpTxn txn, const char *hostname, const int port, void *ih = nullptr) override; + void markNextHop(TSHttpTxn txn, const char *hostname, const int port, const NHCmd status, void *ih = nullptr, + const time_t now = 0) override; + NextHopHealthStatus(){}; + +private: + std::unordered_map> host_map; }; class NextHopSelectionStrategy @@ -190,12 +216,13 @@ class NextHopSelectionStrategy NextHopSelectionStrategy(const std::string_view &name, const NHPolicyType &type); virtual ~NextHopSelectionStrategy(){}; bool Init(const YAML::Node &n); - virtual void findNextHop(const uint64_t sm_id, ParentResult &result, RequestData &rdata, const uint64_t fail_threshold, - const uint64_t retry_time, time_t now = 0) = 0; - void markNextHopDown(const uint64_t sm_id, ParentResult &result, const uint64_t fail_threshold, const uint64_t retry_time, - time_t now = 0); - void markNextHopUp(const uint64_t sm_id, ParentResult &result); - bool nextHopExists(const uint64_t sm_id); + virtual void findNextHop(TSHttpTxn txnp, void *ih = nullptr, time_t now = 0) = 0; + void markNextHop(TSHttpTxn txnp, const char *hostname, const int port, const NHCmd status, void *ih = nullptr, + const time_t now = 0); + bool nextHopExists(TSHttpTxn txnp, void *ih = nullptr); + + virtual bool responseIsRetryable(unsigned int current_retry_attempts, HTTPStatus response_code); + virtual bool onFailureMarkParentDown(HTTPStatus response_code); std::string strategy_name; bool go_direct = true; @@ -206,6 +233,7 @@ class NextHopSelectionStrategy NHRingMode ring_mode = NH_ALTERNATE_RING; ResponseCodes resp_codes; HealthChecks health_checks; + NextHopHealthStatus passive_health; std::vector>> host_groups; uint32_t max_simple_retries = 1; uint32_t groups = 0; diff --git a/proxy/http/remap/unit-tests/consistent-hash-tests.yaml b/proxy/http/remap/unit-tests/consistent-hash-tests.yaml index 5bb1d9bfeea..4ff3449a55e 100644 --- a/proxy/http/remap/unit-tests/consistent-hash-tests.yaml +++ b/proxy/http/remap/unit-tests/consistent-hash-tests.yaml @@ -169,3 +169,198 @@ strategies: health_check: - passive - active + - strategy: "ignore-self-detect-false" + policy: consistent_hash + ignore_self_detect: false + hash_key: path + groups: + - &g1 + - host: localhost + protocol: + - scheme: http + port: 8000 + weight: 1.0 + - host: localhost + protocol: + - scheme: http + port: 8001 + weight: 1.0 + - &g2 + - host: localhost + protocol: + - scheme: http + port: 8002 + weight: 1.0 + - host: localhost + protocol: + - scheme: http + port: 8003 + weight: 1.0 + scheme: http + failover: + ring_mode: exhaust_ring + response_codes: + - 404 + - 503 + health_check: + - passive + - active + - strategy: "ignore-self-detect-true" + policy: consistent_hash + ignore_self_detect: true + hash_key: path + groups: + - &g1 + - host: localhost + protocol: + - scheme: http + port: 8000 + weight: 1.0 + - host: localhost + protocol: + - scheme: http + port: 8001 + weight: 1.0 + - &g2 + - host: localhost + protocol: + - scheme: http + port: 8002 + weight: 1.0 + - host: localhost + protocol: + - scheme: http + port: 8003 + weight: 1.0 + scheme: http + failover: + ring_mode: exhaust_ring + response_codes: + - 404 + - 503 + health_check: + - passive + - active + - strategy: "ignore-self-detect-true" + policy: consistent_hash + ignore_self_detect: true + hash_key: path + groups: + - &g1 + - host: localhost + protocol: + - scheme: http + port: 8000 + weight: 1.0 + - host: localhost + protocol: + - scheme: http + port: 8001 + weight: 1.0 + - &g2 + - host: localhost + protocol: + - scheme: http + port: 8002 + weight: 1.0 + - host: localhost + protocol: + - scheme: http + port: 8003 + weight: 1.0 + scheme: http + failover: + ring_mode: exhaust_ring + response_codes: + - 404 + - 503 + health_check: + - passive + - active + - strategy: "same-host-different-port" + policy: consistent_hash + ignore_self_detect: true + hash_key: path + groups: + - &g1 + - host: localhost + protocol: + - scheme: http + port: 8000 + weight: 1.0 + - host: localhost + protocol: + - scheme: http + port: 8001 + weight: 1.0 + - &g2 + - host: localhost + protocol: + - scheme: http + port: 8002 + weight: 1.0 + - host: localhost + protocol: + - scheme: http + port: 8003 + weight: 1.0 + - &g3 + - host: localhost + protocol: + - scheme: http + port: 8004 + weight: 1.0 + - host: localhost + protocol: + - scheme: http + port: 8005 + weight: 1.0 + scheme: http + failover: + ring_mode: exhaust_ring + response_codes: + - 404 + - 503 + health_check: + - passive + - active + - strategy: "hash-string-override" + policy: consistent_hash + ignore_self_detect: true + hash_key: path + groups: + - &hso0 + - host: foo.test + hash_string: one + protocol: + - scheme: http + port: 80 + weight: 1.0 + - host: bar.test + hash_string: two + protocol: + - scheme: http + port: 80 + weight: 1.0 + - &hso1 + - host: baz.test + hash_string: three + protocol: + - scheme: http + port: 80 + weight: 1.0 + - host: bat.test + hash_string: four + protocol: + - scheme: http + port: 80 + weight: 1.0 + scheme: http + failover: + ring_mode: exhaust_ring + response_codes: + - 404 + - 503 + health_check: + - passive + - active diff --git a/proxy/http/remap/unit-tests/nexthop_test_stubs.cc b/proxy/http/remap/unit-tests/nexthop_test_stubs.cc index 91a168568f2..ab019617bbb 100644 --- a/proxy/http/remap/unit-tests/nexthop_test_stubs.cc +++ b/proxy/http/remap/unit-tests/nexthop_test_stubs.cc @@ -27,38 +27,107 @@ */ +#include "HttpSM.h" #include "nexthop_test_stubs.h" -#include "HttpTransact.h" +HttpSM::HttpSM() : Continuation(nullptr), vc_table(this) {} +void +HttpSM::cleanup() +{ +} +void +HttpSM::destroy() +{ +} +void +HttpSM::handle_api_return() +{ +} +void +HttpSM::set_next_state() +{ +} +int +HttpSM::kill_this_async_hook(int event, void *data) +{ + return 0; +} + +HttpVCTable::HttpVCTable(HttpSM *smp) +{ + sm = smp; +} +HttpCacheAction::HttpCacheAction() {} +void +HttpCacheAction::cancel(Continuation *c) +{ +} +PostDataBuffers::~PostDataBuffers() {} +void +APIHooks::clear() +{ +} + +HttpTunnel::HttpTunnel() {} +HttpCacheSM::HttpCacheSM() {} +HttpHookState::HttpHookState() {} +HttpTunnelConsumer::HttpTunnelConsumer() {} +HttpTunnelProducer::HttpTunnelProducer() {} +ChunkedHandler::ChunkedHandler() {} +alignas(OverridableHttpConfigParams) char _my_txn_conf[sizeof(OverridableHttpConfigParams)]; + +// this is done to cleanup and avoid memory leaks in the unit tests. +static HdrHeap *myHeap = nullptr; void -br_destroy(HttpRequestData &h) +br_destroy(HttpSM &sm) { - delete h.hdr; - delete h.api_info; - ats_free(h.hostname_str); + HttpRequestData *h = &sm.t_state.request_data; + if (myHeap != nullptr) { + myHeap->destroy(); + myHeap = nullptr; + } + delete h->hdr; + delete h->api_info; + ats_free(h->hostname_str); } void -build_request(HttpRequestData &h, const char *os_hostname) +build_request(int64_t sm_id, HttpSM *sm, sockaddr_in *ip, const char *os_hostname, sockaddr const *dest_ip) { - HdrHeap *heap = nullptr; + sm->sm_id = sm_id; - if (h.hdr == nullptr) { - h.hdr = new HTTPHdr(); - h.hdr->create(HTTP_TYPE_REQUEST, heap); - h.xact_start = time(nullptr); - h.incoming_port = 80; - ink_zero(h.src_ip); - ink_zero(h.dest_ip); + if (myHeap == nullptr) { + myHeap = new_HdrHeap(HdrHeap::DEFAULT_SIZE + 64); + } + if (sm->t_state.request_data.hdr != nullptr) { + delete sm->t_state.request_data.hdr; } - if (h.hostname_str != nullptr) { - ats_free(h.hostname_str); - h.hostname_str = ats_strdup(os_hostname); + sm->t_state.request_data.hdr = new HTTPHdr(); + sm->t_state.request_data.hdr->create(HTTP_TYPE_REQUEST, myHeap); + if (sm->t_state.request_data.hostname_str != nullptr) { + ats_free(sm->t_state.request_data.hostname_str); } - if (h.api_info == nullptr) { - h.api_info = new HttpApiInfo(); + sm->t_state.request_data.hostname_str = ats_strdup(os_hostname); + sm->t_state.request_data.xact_start = time(nullptr); + ink_zero(sm->t_state.request_data.src_ip); + ink_zero(sm->t_state.request_data.dest_ip); + ats_ip_copy(&sm->t_state.request_data.dest_ip.sa, dest_ip); + sm->t_state.request_data.incoming_port = 80; + if (sm->t_state.request_data.api_info != nullptr) { + delete sm->t_state.request_data.api_info; } + sm->t_state.request_data.api_info = new HttpApiInfo(); + if (ip != nullptr) { + memcpy(&sm->t_state.request_data.src_ip.sa, ip, sizeof(sm->t_state.request_data.src_ip.sa)); + } + sm->t_state.request_data.xact_start = time(0); + + memset(_my_txn_conf, 0, sizeof(_my_txn_conf)); + OverridableHttpConfigParams *oride = reinterpret_cast(_my_txn_conf); + oride->parent_retry_time = 1; + oride->parent_fail_threshold = 1; + sm->t_state.txn_conf = reinterpret_cast(_my_txn_conf); } void @@ -89,7 +158,7 @@ HttpRequestData::get_ip() sockaddr const * HttpRequestData::get_client_ip() { - return nullptr; + return &src_ip.sa; } #include "InkAPIInternal.h" @@ -125,18 +194,35 @@ Machine::is_self(const char *name) HostStatRec::HostStatRec(){}; HostStatus::HostStatus() {} -HostStatus::~HostStatus(){}; + +HostStatus::~HostStatus() +{ + for (auto i = this->hosts_statuses.begin(); i != this->hosts_statuses.end(); ++i) { + delete i->second; + } +} + HostStatRec * HostStatus::getHostStatus(const char *name) { - // for unit tests only, always return a record with HOST_STATUS_UP - static HostStatRec rec; - rec.status = HostStatus_t::HOST_STATUS_UP; - return &rec; + if (this->hosts_statuses[name] == nullptr) { + // for unit tests only, always return a record with HOST_STATUS_UP, if it wasn't set with setHostStatus + static HostStatRec rec; + rec.status = HostStatus_t::HOST_STATUS_UP; + return &rec; + } + return this->hosts_statuses[name]; } + void -HostStatus::setHostStatus(char const *host, HostStatus_t status, unsigned int, unsigned int) +HostStatus::setHostStatus(char const *host, HostStatus_t status, unsigned int down_time, unsigned int reason) { + if (this->hosts_statuses[host] == nullptr) { + this->hosts_statuses[host] = new (HostStatRec); + } + this->hosts_statuses[host]->status = status; + this->hosts_statuses[host]->reasons = reason; + this->hosts_statuses[host]->local_down_time = down_time; NH_Debug("next_hop", "setting host status for '%s' to %s", host, HostStatusNames[status]); } diff --git a/proxy/http/remap/unit-tests/nexthop_test_stubs.h b/proxy/http/remap/unit-tests/nexthop_test_stubs.h index bc05c5075b7..292ed06eb92 100644 --- a/proxy/http/remap/unit-tests/nexthop_test_stubs.h +++ b/proxy/http/remap/unit-tests/nexthop_test_stubs.h @@ -48,7 +48,6 @@ extern "C" { class HttpRequestData; void PrintToStdErr(const char *fmt, ...); -void br_destroy(HttpRequestData &h); void build_request(HttpRequestData &h, const char *os_hostname); #ifdef __cplusplus @@ -56,34 +55,23 @@ void build_request(HttpRequestData &h, const char *os_hostname); #endif /* __cplusplus */ #include "ControlMatcher.h" -struct TestData : public HttpRequestData { - std::string hostname; - sockaddr client_ip; - sockaddr server_ip; - - TestData() - { - client_ip.sa_family = AF_INET; - memset(client_ip.sa_data, 0, sizeof(client_ip.sa_data)); - } - const char * - get_host() - { - return hostname.c_str(); - } - sockaddr const * - get_ip() - { - return &server_ip; - } - sockaddr const * - get_client_ip() - { - return &client_ip; - } - char * - get_string() - { - return nullptr; - } +#include "ParentSelection.h" + +struct trans_config { + int64_t parent_retry_time = 0; + int64_t parent_fail_threshold = 0; + trans_config() {} +}; + +struct trans_state { + ParentResult parent_result; + HttpRequestData request_data; + trans_config txn_conf; + trans_state() {} }; + +class HttpSM; + +void br_destroy(HttpSM &sm); + +void build_request(int64_t sm_id, HttpSM *sm, sockaddr_in *ip, const char *os_hostname, sockaddr const *dest_ip); diff --git a/proxy/http/remap/unit-tests/test_NextHopConsistentHash.cc b/proxy/http/remap/unit-tests/test_NextHopConsistentHash.cc index f831253064e..47660d6cbee 100644 --- a/proxy/http/remap/unit-tests/test_NextHopConsistentHash.cc +++ b/proxy/http/remap/unit-tests/test_NextHopConsistentHash.cc @@ -31,6 +31,7 @@ #include /* catch unit-test framework */ #include +#include "HttpSM.h" #include "nexthop_test_stubs.h" #include "NextHopSelectionStrategy.h" #include "NextHopStrategyFactory.h" @@ -66,18 +67,11 @@ SCENARIO("Testing NextHopConsistentHash class, using policy 'consistent_hash'", WHEN("requests are received.") { - HttpRequestData request; - ParentResult result; - TestData rdata; - rdata.xact_start = time(nullptr); - uint64_t fail_threshold = 1; - uint64_t retry_time = 1; - // need to run these checks in succession so there // are no host status state changes. // // These tests simulate failed requests using a selected host. - // markNextHopDown() is called by the state machine when + // markNextHop() is called by the state machine when // there is a request failure due to a connection error or // timeout. the 'result' struct has the information on the // host used in the failed request and when called, marks the @@ -88,95 +82,100 @@ SCENARIO("Testing NextHopConsistentHash class, using policy 'consistent_hash'", // THEN("when making requests and taking nodes down.") { + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); + REQUIRE(nhf.strategies_loaded == true); REQUIRE(strategy != nullptr); // first request. - build_request(request, "rabbit.net"); - result.reset(); - strategy->findNextHop(10001, result, request, fail_threshold, retry_time); + build_request(10001, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "p1.foo.com") == 0); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "p1.foo.com") == 0); - // mark down p1.foo.com. markNextHopDown looks at the 'result' + // mark down p1.foo.com. markNextHop looks at the 'result' // and uses the host index there mark down the host selected // from a - strategy->markNextHopDown(10001, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // second request - reusing the ParentResult from the last request // simulating a failure triggers a search for another parent, not firstcall. - build_request(request, "rabbit.net"); - strategy->findNextHop(10002, result, request, fail_threshold, retry_time); + build_request(10002, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "p2.foo.com") == 0); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "p2.foo.com") == 0); // mark down p2.foo.com - strategy->markNextHopDown(10002, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // third request - reusing the ParentResult from the last request // simulating a failure triggers a search for another parent, not firstcall. - build_request(request, "rabbit.net"); - strategy->findNextHop(10003, result, request, fail_threshold, retry_time); + build_request(10003, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "s2.bar.com") == 0); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "s2.bar.com") == 0); // mark down s2.bar.com - strategy->markNextHopDown(10003, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // fourth request - reusing the ParentResult from the last request // simulating a failure triggers a search for another parent, not firstcall. - build_request(request, "rabbit.net"); - strategy->findNextHop(10004, result, request, fail_threshold, retry_time); + build_request(10004, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "s1.bar.com") == 0); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "s1.bar.com") == 0); // mark down s1.bar.com. - strategy->markNextHopDown(10004, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // fifth request - reusing the ParentResult from the last request // simulating a failure triggers a search for another parent, not firstcall. - build_request(request, "rabbit.net"); - strategy->findNextHop(10005, result, request, fail_threshold, retry_time); + build_request(10005, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "q1.bar.com") == 0); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "q1.bar.com") == 0); // mark down q1.bar.com - strategy->markNextHopDown(10005, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // sixth request - reusing the ParentResult from the last request // simulating a failure triggers a search for another parent, not firstcall. - build_request(request, "rabbit.net"); - strategy->findNextHop(10006, result, request, fail_threshold, retry_time); + build_request(10006, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "q2.bar.com") == 0); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "q2.bar.com") == 0); // mark down q2.bar.com - strategy->markNextHopDown(10006, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // seventh request - reusing the ParentResult from the last request // simulating a failure triggers a search for another parent, not firstcall. - build_request(request, "rabbit.net"); - strategy->findNextHop(10007, result, request, fail_threshold, retry_time); + build_request(10007, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); - CHECK(result.result == ParentResultType::PARENT_DIRECT); - CHECK(result.hostname == nullptr); + CHECK(result->result == ParentResultType::PARENT_DIRECT); + CHECK(result->hostname == nullptr); // sleep and test that q2 is becomes retryable; time_t now = time(nullptr) + 5; // eighth request - reusing the ParentResult from the last request // simulating a failure triggers a search for another parent, not firstcall. - build_request(request, "rabbit.net"); - strategy->findNextHop(10008, result, request, fail_threshold, retry_time, now); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "q2.bar.com") == 0); + build_request(10008, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp, nullptr, now); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "q2.bar.com") == 0); + + // free up request resources. + br_destroy(sm); } - // free up request resources. - br_destroy(request); } } } @@ -213,12 +212,9 @@ SCENARIO("Testing NextHopConsistentHash class (all firstcalls), using policy 'co // state changes induced by using multiple WHEN() and THEN() WHEN("initial requests are made and hosts are unavailable .") { - uint64_t fail_threshold = 1; - uint64_t retry_time = 1; - TestData rdata; - rdata.xact_start = time(nullptr); - HttpRequestData request; - ParentResult result; + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); THEN("when making requests and taking nodes down.") { @@ -226,61 +222,289 @@ SCENARIO("Testing NextHopConsistentHash class (all firstcalls), using policy 'co REQUIRE(strategy != nullptr); // first request. - build_request(request, "rabbit.net"); - result.reset(); - strategy->findNextHop(20001, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "p1.foo.com") == 0); + build_request(20001, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "p1.foo.com") == 0); // mark down p1.foo.com - strategy->markNextHopDown(20001, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // second request - build_request(request, "rabbit.net"); - result.reset(); - strategy->findNextHop(20002, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "p2.foo.com") == 0); + build_request(20002, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "p2.foo.com") == 0); // mark down p2.foo.com - strategy->markNextHopDown(20002, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // third request - build_request(request, "rabbit.net"); - result.reset(); - strategy->findNextHop(20003, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "s2.bar.com") == 0); + result->reset(); + build_request(20003, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "s2.bar.com") == 0); // mark down s2.bar.com - strategy->markNextHopDown(20003, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // fourth request - build_request(request, "rabbit.net"); - result.reset(); - strategy->findNextHop(20004, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "s1.bar.com") == 0); + result->reset(); + build_request(20004, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "s1.bar.com") == 0); // mark down s1.bar.com - strategy->markNextHopDown(20004, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // fifth request - build_request(request, "rabbit.net/asset1"); - result.reset(); - strategy->findNextHop(20005, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "q1.bar.com") == 0); + result->reset(); + build_request(20005, &sm, nullptr, "rabbit.net/asset1", nullptr); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "q1.bar.com") == 0); // sixth request - wait and p1 should now become available time_t now = time(nullptr) + 5; - build_request(request, "rabbit.net"); - result.reset(); - strategy->findNextHop(20006, result, request, fail_threshold, retry_time, now); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "p1.foo.com") == 0); + result->reset(); + build_request(20006, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp, nullptr, now); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "p1.foo.com") == 0); } // free up request resources. - br_destroy(request); + br_destroy(sm); + } + } +} + +SCENARIO("Testing NextHop ignore_self_detect false", "[NextHopConsistentHash]") +{ + // We need this to build a HdrHeap object in build_request(); + // No thread setup, forbid use of thread local allocators. + cmd_disable_pfreelist = true; + // Get all of the HTTP WKS items populated. + http_init(); + + GIVEN("Loading the consistent-hash-tests.yaml config for 'consistent_hash' tests.") + { + // load the configuration strtegies. + std::shared_ptr strategy; + NextHopStrategyFactory nhf(TS_SRC_DIR "unit-tests/consistent-hash-tests.yaml"); + strategy = nhf.strategyInstance("ignore-self-detect-false"); + + HostStatus &hs = HostStatus::instance(); + hs.setHostStatus("localhost", HostStatus_t::HOST_STATUS_DOWN, 0, Reason::SELF_DETECT); + + WHEN("the config is loaded.") + { + THEN("then testing consistent hash.") + { + REQUIRE(nhf.strategies_loaded == true); + REQUIRE(strategy != nullptr); + REQUIRE(strategy->groups == 2); + } + } + + WHEN("requests are received.") + { + THEN("when making requests to localhost.") + { + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); + + REQUIRE(nhf.strategies_loaded == true); + REQUIRE(strategy != nullptr); + + build_request(10001, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_DIRECT); + CHECK(result->hostname == nullptr); + br_destroy(sm); + } + } + } +} + +SCENARIO("Testing NextHop ignore_self_detect true", "[NextHopConsistentHash]") +{ + // We need this to build a HdrHeap object in build_request(); + // No thread setup, forbid use of thread local allocators. + cmd_disable_pfreelist = true; + // Get all of the HTTP WKS items populated. + http_init(); + + GIVEN("Loading the consistent-hash-tests.yaml config for 'consistent_hash' tests.") + { + // load the configuration strtegies. + std::shared_ptr strategy; + NextHopStrategyFactory nhf(TS_SRC_DIR "unit-tests/consistent-hash-tests.yaml"); + strategy = nhf.strategyInstance("ignore-self-detect-true"); + + HostStatus &hs = HostStatus::instance(); + hs.setHostStatus("localhost", HostStatus_t::HOST_STATUS_DOWN, 0, Reason::SELF_DETECT); + + WHEN("the config is loaded.") + { + THEN("then testing consistent hash.") + { + REQUIRE(nhf.strategies_loaded == true); + REQUIRE(strategy != nullptr); + REQUIRE(strategy->groups == 2); + } + } + + WHEN("requests are received.") + { + THEN("when making requests to localhost.") + { + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); + + REQUIRE(nhf.strategies_loaded == true); + REQUIRE(strategy != nullptr); + build_request(10001, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "localhost") == 0); + CHECK(result->port == 8000); + br_destroy(sm); + } + } + } +} + +SCENARIO("Testing NextHopConsistentHash same host different port markdown", "[NextHopConsistentHash]") +{ + // We need this to build a HdrHeap object in build_request(); + // No thread setup, forbid use of thread local allocators. + cmd_disable_pfreelist = true; + // Get all of the HTTP WKS items populated. + http_init(); + + GIVEN("Loading the consistent-hash-tests.yaml config for 'consistent_hash' tests.") + { + // load the configuration strtegies. + std::shared_ptr strategy; + NextHopStrategyFactory nhf(TS_SRC_DIR "unit-tests/consistent-hash-tests.yaml"); + strategy = nhf.strategyInstance("same-host-different-port"); + + WHEN("the config is loaded.") + { + THEN("then testing consistent hash.") + { + REQUIRE(nhf.strategies_loaded == true); + REQUIRE(strategy != nullptr); + REQUIRE(strategy->groups == 3); + } + } + + WHEN("requests are received.") + { + THEN("when making requests and taking nodes down.") + { + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); + + REQUIRE(nhf.strategies_loaded == true); + REQUIRE(strategy != nullptr); + + // first request. + build_request(10001, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "localhost") == 0); + CHECK(result->port == 8000); + + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); + + build_request(10002, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "localhost") == 0); + CHECK(result->port == 8002); + + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); + + build_request(10003, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "localhost") == 0); + CHECK(result->port == 8004); + br_destroy(sm); + } + } + } +} + +SCENARIO("Testing NextHopConsistentHash hash_string override", "[NextHopConsistentHash]") +{ + // We need this to build a HdrHeap object in build_request(); + // No thread setup, forbid use of thread local allocators. + cmd_disable_pfreelist = true; + // Get all of the HTTP WKS items populated. + http_init(); + + GIVEN("Loading the consistent-hash-tests.yaml config for 'consistent_hash' tests.") + { + // load the configuration strtegies. + std::shared_ptr strategy; + NextHopStrategyFactory nhf(TS_SRC_DIR "unit-tests/consistent-hash-tests.yaml"); + strategy = nhf.strategyInstance("hash-string-override"); + + WHEN("the config is loaded.") + { + THEN("then testing consistent hash.") + { + REQUIRE(nhf.strategies_loaded == true); + REQUIRE(strategy != nullptr); + REQUIRE(strategy->groups == 2); + } + } + + WHEN("requests are received.") + { + THEN("when making requests and taking nodes down.") + { + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); + + REQUIRE(nhf.strategies_loaded == true); + REQUIRE(strategy != nullptr); + + build_request(10001, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + + // We happen to know that 'foo.test' will be first if the hostname is the hash + // and foo.test will be first for the hash 'first' and the bar.test hash 'second'. + // So, if the hash_string override isn't getting applied, this will fail. + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "bar.test") == 0); + CHECK(result->port == 80); + + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); + + build_request(10002, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + + CHECK(strcmp(result->hostname, "foo.test") == 0); + CHECK(result->port == 80); + br_destroy(sm); + } } } } @@ -312,12 +536,9 @@ SCENARIO("Testing NextHopConsistentHash class (alternating rings), using policy // makeing requests and marking down hosts with a config set for alternating ring mode. WHEN("requests are made in a config set for alternating rings and hosts are marked down.") { - uint64_t fail_threshold = 1; - uint64_t retry_time = 1; - TestData rdata; - rdata.xact_start = time(nullptr); - HttpRequestData request; - ParentResult result; + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); THEN("expect the following results when making requests and marking hosts down.") { @@ -325,76 +546,77 @@ SCENARIO("Testing NextHopConsistentHash class (alternating rings), using policy REQUIRE(strategy != nullptr); // first request. - build_request(request, "bunny.net/asset1"); - result.reset(); - strategy->findNextHop(30001, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "c2.foo.com") == 0); + result->reset(); + build_request(30001, &sm, nullptr, "bunny.net/asset1", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "c2.foo.com") == 0); // simulated failure, mark c2 down and retry request - strategy->markNextHopDown(30001, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // second request - build_request(request, "bunny.net.net/asset1"); - strategy->findNextHop(30002, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "c3.bar.com") == 0); + build_request(30002, &sm, nullptr, "bunny.net.net/asset1", nullptr); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "c3.bar.com") == 0); // mark down c3.bar.com - strategy->markNextHopDown(30002, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // third request - build_request(request, "bunny.net/asset2"); - result.reset(); - strategy->findNextHop(30003, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "c6.bar.com") == 0); + build_request(30003, &sm, nullptr, "bunny.net/asset2", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "c6.bar.com") == 0); // just mark it down and retry request - strategy->markNextHopDown(30003, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // fourth request - build_request(request, "bunny.net/asset2"); - strategy->findNextHop(30004, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "c1.foo.com") == 0); + build_request(30004, &sm, nullptr, "bunny.net/asset2", nullptr); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "c1.foo.com") == 0); // mark it down - strategy->markNextHopDown(30004, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // fifth request - new request - build_request(request, "bunny.net/asset3"); - result.reset(); - strategy->findNextHop(30005, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "c4.bar.com") == 0); + build_request(30005, &sm, nullptr, "bunny.net/asset3", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "c4.bar.com") == 0); // mark it down and retry - strategy->markNextHopDown(30005, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // sixth request - build_request(request, "bunny.net/asset3"); - result.reset(); - strategy->findNextHop(30006, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "c5.bar.com") == 0); + result->reset(); + build_request(30006, &sm, nullptr, "bunny.net/asset3", nullptr); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "c5.bar.com") == 0); // mark it down - strategy->markNextHopDown(30006, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // seventh request - new request with all hosts down and go_direct is false. - build_request(request, "bunny.net/asset4"); - result.reset(); - strategy->findNextHop(30007, result, request, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_FAIL); - CHECK(result.hostname == nullptr); + result->reset(); + build_request(30007, &sm, nullptr, "bunny.net/asset4", nullptr); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_FAIL); + CHECK(result->hostname == nullptr); // eighth request - retry after waiting for the retry window to expire. time_t now = time(nullptr) + 5; - build_request(request, "bunny.net/asset4"); - result.reset(); - strategy->findNextHop(30008, result, request, fail_threshold, retry_time, now); - CHECK(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "c2.foo.com") == 0); + result->reset(); + build_request(30008, &sm, nullptr, "bunny.net/asset4", nullptr); + strategy->findNextHop(txnp, nullptr, now); + CHECK(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "c2.foo.com") == 0); } // free up request resources. - br_destroy(request); + br_destroy(sm); } } } diff --git a/proxy/http/remap/unit-tests/test_NextHopRoundRobin.cc b/proxy/http/remap/unit-tests/test_NextHopRoundRobin.cc index 05c260cc5de..506d6470391 100644 --- a/proxy/http/remap/unit-tests/test_NextHopRoundRobin.cc +++ b/proxy/http/remap/unit-tests/test_NextHopRoundRobin.cc @@ -31,6 +31,7 @@ #include /* catch unit-test framework */ #include +#include "HttpSM.h" #include "nexthop_test_stubs.h" #include "NextHopSelectionStrategy.h" #include "NextHopStrategyFactory.h" @@ -38,6 +39,12 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'rr-strict'", "[NextHopRoundRobin]") { + // We need this to build a HdrHeap object in build_request(); + // No thread setup, forbid use of thread local allocators. + cmd_disable_pfreelist = true; + // Get all of the HTTP WKS items populated. + http_init(); + GIVEN("Loading the round-robin-tests.yaml config for round robin 'rr-strict' tests.") { std::shared_ptr strategy; @@ -56,9 +63,9 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'rr-strict'", "[NextHopR WHEN("making requests using a 'rr-strict' policy.") { - uint64_t fail_threshold = 1; - uint64_t retry_time = 1; - TestData rdata; + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); THEN("then testing rr-strict.") { @@ -66,86 +73,103 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'rr-strict'", "[NextHopR REQUIRE(strategy != nullptr); // first request. - ParentResult result; - strategy->findNextHop(10000, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p1.foo.com") == 0); + build_request(10001, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p1.foo.com") == 0); // second request. - result.reset(); - strategy->findNextHop(10001, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p2.foo.com") == 0); + build_request(10002, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p2.foo.com") == 0); // third request. - result.reset(); - strategy->findNextHop(10002, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p1.foo.com") == 0); + build_request(10003, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p1.foo.com") == 0); // did not reset result, kept it as last parent selected was p1.fo.com, mark it down and we should only select p2.foo.com - strategy->markNextHopDown(10003, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // fourth request, p1 is down should select p2. - result.reset(); - strategy->findNextHop(10004, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p2.foo.com") == 0); + build_request(10004, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p2.foo.com") == 0); // fifth request, p1 is down should still select p2. - result.reset(); - strategy->findNextHop(10005, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p2.foo.com") == 0); + build_request(10005, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p2.foo.com") == 0); // mark down p2. - strategy->markNextHopDown(10006, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // fifth request, p1 and p2 are both down, should get s1.bar.com from failover ring. - result.reset(); - strategy->findNextHop(10007, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "s1.bar.com") == 0); + build_request(10006, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "s1.bar.com") == 0); // sixth request, p1 and p2 are still down, should get s1.bar.com from failover ring. - result.reset(); - strategy->findNextHop(10008, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "s1.bar.com") == 0); + build_request(10007, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "s1.bar.com") == 0); // mark down s1. - strategy->markNextHopDown(10009, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // seventh request, p1, p2, s1 are down, should get s2.bar.com from failover ring. - result.reset(); - strategy->findNextHop(10010, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "s2.bar.com") == 0); + build_request(10008, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "s2.bar.com") == 0); // mark down s2. - strategy->markNextHopDown(10011, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // eighth request, p1, p2, s1, s2 are down, should get PARENT_DIRECT as go_direct is true - result.reset(); - strategy->findNextHop(10012, result, rdata, fail_threshold, retry_time); - CHECK(result.result == ParentResultType::PARENT_DIRECT); + build_request(10009, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(result->result == ParentResultType::PARENT_DIRECT); // check that nextHopExists() returns false when all parents are down. - CHECK(strategy->nextHopExists(10012) == false); + CHECK(strategy->nextHopExists(txnp) == false); // change the request time to trigger a retry. time_t now = (time(nullptr) + 5); // ninth request, p1 and p2 are still down, should get p2.foo.com as it will be retried - result.reset(); - strategy->findNextHop(10013, result, rdata, fail_threshold, retry_time, now); - REQUIRE(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "p2.foo.com") == 0); + build_request(10010, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp, nullptr, now); + REQUIRE(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "p2.foo.com") == 0); // tenth request, p1 should now be retried. - result.reset(); - strategy->findNextHop(10014, result, rdata, fail_threshold, retry_time, now); - REQUIRE(result.result == ParentResultType::PARENT_SPECIFIED); - CHECK(strcmp(result.hostname, "p1.foo.com") == 0); + build_request(10011, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp, nullptr, now); + REQUIRE(result->result == ParentResultType::PARENT_SPECIFIED); + CHECK(strcmp(result->hostname, "p1.foo.com") == 0); } + br_destroy(sm); } } } SCENARIO("Testing NextHopRoundRobin class, using policy 'first-live'", "[NextHopRoundRobin]") { + // We need this to build a HdrHeap object in build_request(); + // No thread setup, forbid use of thread local allocators. + cmd_disable_pfreelist = true; + // Get all of the HTTP WKS items populated. + http_init(); + GIVEN("Loading the round-robin-tests.yaml config for round robin 'first-live' tests.") { std::shared_ptr strategy; @@ -164,9 +188,9 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'first-live'", "[NextHop WHEN("when using a strategy with a 'first-live' policy.") { - uint64_t fail_threshold = 1; - uint64_t retry_time = 1; - TestData rdata; + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); THEN("when making requests and marking down hosts.") { @@ -174,37 +198,47 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'first-live'", "[NextHop REQUIRE(strategy != nullptr); // first request. - ParentResult result; - strategy->findNextHop(20000, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p1.foo.com") == 0); + build_request(10012, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p1.foo.com") == 0); // second request. - result.reset(); - strategy->findNextHop(20001, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p1.foo.com") == 0); + build_request(10013, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p1.foo.com") == 0); // mark down p1. - strategy->markNextHopDown(20002, result, 1, fail_threshold); + strategy->markNextHop(txnp, result->hostname, result->port, NH_MARK_DOWN); // third request. - result.reset(); - strategy->findNextHop(20003, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p2.foo.com") == 0); + build_request(10014, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p2.foo.com") == 0); // change the request time to trigger a retry. time_t now = (time(nullptr) + 5); // fourth request, p1 should be marked for retry - result.reset(); - strategy->findNextHop(20004, result, rdata, fail_threshold, retry_time, now); - CHECK(strcmp(result.hostname, "p1.foo.com") == 0); + build_request(10015, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp, nullptr, now); + CHECK(strcmp(result->hostname, "p1.foo.com") == 0); } + br_destroy(sm); } } } SCENARIO("Testing NextHopRoundRobin class, using policy 'rr-ip'", "[NextHopRoundRobin]") { + // We need this to build a HdrHeap object in build_request(); + // No thread setup, forbid use of thread local allocators. + cmd_disable_pfreelist = true; + // Get all of the HTTP WKS items populated. + http_init(); + GIVEN("Loading the round-robin-tests.yaml config for round robin 'rr-ip' tests.") { std::shared_ptr strategy; @@ -217,6 +251,10 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'rr-ip'", "[NextHopRound sa2.sin_port = 10001; sa2.sin_family = AF_INET; inet_pton(AF_INET, "192.168.1.2", &(sa2.sin_addr)); + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); + WHEN("the config is loaded.") { THEN("then the 'rr-strict' strategy is ready.") @@ -229,10 +267,6 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'rr-ip'", "[NextHopRound WHEN("using the 'rr-strict' strategy.") { - uint64_t fail_threshold = 1; - uint64_t retry_time = 1; - TestData rdata; - THEN("when making requests and marking down hosts.") { REQUIRE(nhf.strategies_loaded == true); @@ -240,48 +274,58 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'rr-ip'", "[NextHopRound // call and test parentExists(), this call should not affect // findNextHop() round robin strict results - CHECK(strategy->nextHopExists(29000) == true); + CHECK(strategy->nextHopExists(txnp) == true); // first request. - memcpy(&rdata.client_ip, &sa1, sizeof(sa1)); - ParentResult result; - strategy->findNextHop(30000, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p4.foo.com") == 0); + // memcpy(&rdata.client_ip, &sa1, sizeof(sa1)); + build_request(10016, &sm, &sa1, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p4.foo.com") == 0); // call and test parentExists(), this call should not affect // findNextHop round robin strict results. - CHECK(strategy->nextHopExists(29000) == true); + CHECK(strategy->nextHopExists(txnp) == true); // second request. - memcpy(&rdata.client_ip, &sa2, sizeof(sa2)); - result.reset(); - strategy->findNextHop(30001, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p3.foo.com") == 0); + // memcpy(&rdata.client_ip, &sa2, sizeof(sa2)); + build_request(10017, &sm, &sa2, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p3.foo.com") == 0); // call and test parentExists(), this call should not affect // findNextHop() round robin strict results - CHECK(strategy->nextHopExists(29000) == true); + CHECK(strategy->nextHopExists(txnp) == true); // third request with same client ip, result should still be p3 - result.reset(); - strategy->findNextHop(30002, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p3.foo.com") == 0); + build_request(10018, &sm, &sa2, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p3.foo.com") == 0); // call and test parentExists(), this call should not affect // findNextHop() round robin strict results. - CHECK(strategy->nextHopExists(29000) == true); + CHECK(strategy->nextHopExists(txnp) == true); // fourth request with same client ip and same result indicating a failure should result in p4 // being selected. - strategy->findNextHop(30003, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p4.foo.com") == 0); + build_request(10019, &sm, &sa2, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p4.foo.com") == 0); } } + br_destroy(sm); } } SCENARIO("Testing NextHopRoundRobin class, using policy 'latched'", "[NextHopRoundRobin]") { + // We need this to build a HdrHeap object in build_request(); + // No thread setup, forbid use of thread local allocators. + cmd_disable_pfreelist = true; + // Get all of the HTTP WKS items populated. + http_init(); + GIVEN("Loading the round-robin-tests.yaml config for round robin 'latched' tests.") { std::shared_ptr strategy; @@ -300,9 +344,9 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'latched'", "[NextHopRou WHEN("using a strategy having a 'latched' policy.") { - uint64_t fail_threshold = 1; - uint64_t retry_time = 1; - TestData rdata; + HttpSM sm; + ParentResult *result = &sm.t_state.parent_result; + TSHttpTxn txnp = reinterpret_cast(&sm); THEN("when making requests and marking down hosts.") { @@ -310,28 +354,33 @@ SCENARIO("Testing NextHopRoundRobin class, using policy 'latched'", "[NextHopRou REQUIRE(strategy != nullptr); // first request should select p3 - ParentResult result; - strategy->findNextHop(40000, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p3.foo.com") == 0); + build_request(10020, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p3.foo.com") == 0); // second request should select p3 - result.reset(); - strategy->findNextHop(40001, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p3.foo.com") == 0); + build_request(10021, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p3.foo.com") == 0); // third request, use previous result to simulate a failure, we should now select p4. - strategy->findNextHop(40002, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p4.foo.com") == 0); + build_request(10022, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p4.foo.com") == 0); // fourth request we should be latched on p4 - result.reset(); - strategy->findNextHop(40003, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p4.foo.com") == 0); + build_request(10023, &sm, nullptr, "rabbit.net", nullptr); + result->reset(); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p4.foo.com") == 0); // fifth request, use previous result to simulate a failure, we should now select p3. - strategy->findNextHop(40004, result, rdata, fail_threshold, retry_time); - CHECK(strcmp(result.hostname, "p3.foo.com") == 0); + build_request(10024, &sm, nullptr, "rabbit.net", nullptr); + strategy->findNextHop(txnp); + CHECK(strcmp(result->hostname, "p3.foo.com") == 0); } + br_destroy(sm); } } }