From 33c157b8c77e173dde487f41a91d6a5305fe49f9 Mon Sep 17 00:00:00 2001 From: Sudheer Vinukonda Date: Tue, 22 Oct 2019 19:16:21 -0700 Subject: [PATCH] Enhance Connection Collapse in ATS core Add an option to support cache open read retry on a write lock failure. With this option, as long as read-while-writer is set up following the guidelines in the docs, there should be no need for any plugins to augment the core. Eventual plan is to deprecate collapsed_forwarding plugin with this new support. For more context on this, see https://cwiki.apache.org/confluence/display/TS/Presentations+-+2019?preview=/112821251/132320653/Collapsed%20Forwarding%20.pdf --- doc/admin-guide/files/records.config.en.rst | 9 ++++ proxy/http/HttpCacheSM.cc | 55 +++++++++++++++------ proxy/http/HttpSM.cc | 9 ++++ proxy/http/HttpTransact.cc | 22 ++++++--- proxy/http/HttpTransact.h | 1 + 5 files changed, 76 insertions(+), 20 deletions(-) diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst index b2ca3945283..1c278681068 100644 --- a/doc/admin-guide/files/records.config.en.rst +++ b/doc/admin-guide/files/records.config.en.rst @@ -2185,6 +2185,9 @@ all the different user-agent versions of documents it encounters. The number of times to attempt a cache open write upon failure to get a write lock. + This config is ignored when :ts:cv:`proxy.config.http.cache.open_write_fail_action` is + set to ``5``. + .. ts:cv:: CONFIG proxy.config.http.cache.open_write_fail_action INT 0 :reloadable: :overridable: @@ -2207,6 +2210,12 @@ all the different user-agent versions of documents it encounters. :ts:cv:`proxy.config.http.cache.max_stale_age`. Otherwise, go to origin server. ``4`` Return a ``502`` error on either a cache miss or on a revalidation. + ``5`` Retry Cache Read on a Cache Write Lock failure. This option together + with `proxy.config.cache.enable_read_while_writer` configuration + allows to collapse concurrent requests without a need for any plugin. + Make sure to configure Read While Writer feature correctly following + the docs in Cache Basics section. Note that this option may result in + CACHE_LOOKUP_COMPLETE HOOK being called back more than once. ===== ====================================================================== Customizable User Response Pages diff --git a/proxy/http/HttpCacheSM.cc b/proxy/http/HttpCacheSM.cc index 52190bc4c17..daf39c10d56 100644 --- a/proxy/http/HttpCacheSM.cc +++ b/proxy/http/HttpCacheSM.cc @@ -175,7 +175,8 @@ HttpCacheSM::state_cache_open_write(int event, void *data) { STATE_ENTER(&HttpCacheSM::state_cache_open_write, event); ink_assert(captive_action.cancelled == 0); - pending_action = nullptr; + pending_action = nullptr; + bool read_retry_on_write_fail = false; switch (event) { case CACHE_EVENT_OPEN_WRITE: @@ -187,9 +188,26 @@ HttpCacheSM::state_cache_open_write(int event, void *data) break; case CACHE_EVENT_OPEN_WRITE_FAILED: - if (open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) { + if (master_sm->t_state.txn_conf->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY) { + // fall back to open_read_tries + // Note that when CACHE_WL_FAIL_ACTION_READ_RETRY is configured, max_cache_open_write_retries + // is automatically ignored. Make sure to not disable max_cache_open_read_retries + // with CACHE_WL_FAIL_ACTION_READ_RETRY as this results in proxy'ing to origin + // without write retries in both a cache miss or a cache refresh scenario. + if (open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) { + Debug("http_cache", "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. read retry triggered", + master_sm->sm_id, open_write_tries); + open_read_tries = 0; + read_retry_on_write_fail = true; + // make sure it doesn't loop indefinitely + open_write_tries = master_sm->t_state.txn_conf->max_cache_open_write_retries + 1; + } + } + if (read_retry_on_write_fail || open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) { // Retry open write; open_write_cb = false; + // reset captive_action since HttpSM cancelled it + captive_action.cancelled = 0; do_schedule_in(); } else { // The cache is hosed or full or something. @@ -204,18 +222,27 @@ HttpCacheSM::state_cache_open_write(int event, void *data) break; case EVENT_INTERVAL: - // Retry the cache open write if the number retries is less - // than or equal to the max number of open write retries - ink_assert(open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries); - Debug("http_cache", - "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. " - "retrying cache open write...", - master_sm->sm_id, open_write_tries); - - open_write( - &cache_key, lookup_url, read_request_hdr, master_sm->t_state.cache_info.object_read, - (time_t)((master_sm->t_state.cache_control.pin_in_cache_for < 0) ? 0 : master_sm->t_state.cache_control.pin_in_cache_for), - retry_write, false); + if (master_sm->t_state.txn_conf->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY) { + Debug("http_cache", + "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. " + "falling back to read retry...", + master_sm->sm_id, open_write_tries); + open_read_cb = false; + master_sm->handleEvent(CACHE_EVENT_OPEN_READ, data); + } else { + Debug("http_cache", + "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. " + "retrying cache open write...", + master_sm->sm_id, open_write_tries); + + // Retry the cache open write if the number retries is less + // than or equal to the max number of open write retries + ink_assert(open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries); + open_write(&cache_key, lookup_url, read_request_hdr, master_sm->t_state.cache_info.object_read, + static_cast( + (master_sm->t_state.cache_control.pin_in_cache_for < 0) ? 0 : master_sm->t_state.cache_control.pin_in_cache_for), + retry_write, false); + } break; default: diff --git a/proxy/http/HttpSM.cc b/proxy/http/HttpSM.cc index b6d03b5a668..aaf548e920f 100644 --- a/proxy/http/HttpSM.cc +++ b/proxy/http/HttpSM.cc @@ -2392,6 +2392,15 @@ HttpSM::state_cache_open_write(int event, void *data) // INTENTIONAL FALL THROUGH // Allow for stale object to be served case CACHE_EVENT_OPEN_READ: + if (!t_state.cache_info.object_read) { + t_state.cache_open_write_fail_action = t_state.txn_conf->cache_open_write_fail_action; + // Note that CACHE_LOOKUP_COMPLETE may be invoked more than once + // if CACHE_WL_FAIL_ACTION_READ_RETRY is configured + ink_assert(t_state.cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY); + t_state.cache_lookup_result = HttpTransact::CACHE_LOOKUP_NONE; + t_state.cache_info.write_lock_state = HttpTransact::CACHE_WL_READ_RETRY; + break; + } // The write vector was locked and the cache_sm retried // and got the read vector again. cache_sm.cache_read_vc->get_http_info(&t_state.cache_info.object_read); diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index 10b5548155d..e1c1a5cc1a0 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -2874,7 +2874,6 @@ HttpTransact::handle_cache_write_lock(State *s) } TRANSACT_RETURN(SM_ACTION_SEND_ERROR_CACHE_NOOP, nullptr); - return; default: s->cache_info.write_status = CACHE_WRITE_LOCK_MISS; remove_ims = true; @@ -2882,14 +2881,25 @@ HttpTransact::handle_cache_write_lock(State *s) } break; case CACHE_WL_READ_RETRY: - // Write failed but retried and got a vector to read - // We need to clean up our state so that transact does - // not assert later on. Then handle the open read hit - // s->request_sent_time = UNDEFINED_TIME; s->response_received_time = UNDEFINED_TIME; s->cache_info.action = CACHE_DO_LOOKUP; - remove_ims = true; + if (!s->cache_info.object_read) { + // Write failed and read retry triggered + // Clean up server_request and re-initiate + // Cache Lookup + ink_assert(s->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY); + s->cache_info.write_status = CACHE_WRITE_LOCK_MISS; + StateMachineAction_t next; + next = SM_ACTION_CACHE_LOOKUP; + s->next_action = next; + s->hdr_info.server_request.destroy(); + TRANSACT_RETURN(next, nullptr); + } + // Write failed but retried and got a vector to read + // We need to clean up our state so that transact does + // not assert later on. Then handle the open read hit + remove_ims = true; SET_VIA_STRING(VIA_DETAIL_CACHE_TYPE, VIA_DETAIL_CACHE); break; case CACHE_WL_INIT: diff --git a/proxy/http/HttpTransact.h b/proxy/http/HttpTransact.h index e3ee61b28cb..a5b8bdc56d3 100644 --- a/proxy/http/HttpTransact.h +++ b/proxy/http/HttpTransact.h @@ -249,6 +249,7 @@ class HttpTransact CACHE_WL_FAIL_ACTION_STALE_ON_REVALIDATE = 0x02, CACHE_WL_FAIL_ACTION_ERROR_ON_MISS_STALE_ON_REVALIDATE = 0x03, CACHE_WL_FAIL_ACTION_ERROR_ON_MISS_OR_REVALIDATE = 0x04, + CACHE_WL_FAIL_ACTION_READ_RETRY = 0x05, TOTAL_CACHE_WL_FAIL_ACTION_TYPES };