From ebd4d96719957b7fd8c384041e153df4c84a3087 Mon Sep 17 00:00:00 2001 From: Sudheer Vinukonda Date: Tue, 22 Oct 2019 19:16:21 -0700 Subject: [PATCH] Enhance Connection Collapse in ATS core Add an option to support cache open read retry on a write lock failure. With this option, as long as read-while-writer is set up following the guidelines in the docs, there should be no need for any plugins to augment the core. Eventual plan is to deprecate collapsed_forwarding plugin with this new support. For more context on this, see https://cwiki.apache.org/confluence/display/TS/Presentations+-+2019?preview=/112821251/132320653/Collapsed%20Forwarding%20.pdf --- doc/admin-guide/files/records.config.en.rst | 9 ++++ proxy/http/HttpCacheSM.cc | 55 +++++++++++++++------ proxy/http/HttpSM.cc | 9 ++++ proxy/http/HttpTransact.cc | 22 ++++++--- proxy/http/HttpTransact.h | 1 + 5 files changed, 76 insertions(+), 20 deletions(-) diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst index ea7dfb8642a..e86b7429eed 100644 --- a/doc/admin-guide/files/records.config.en.rst +++ b/doc/admin-guide/files/records.config.en.rst @@ -2289,6 +2289,9 @@ Dynamic Content & Content Negotiation The number of times to attempt a cache open write upon failure to get a write lock. + This config is ignored when :ts:cv:`proxy.config.http.cache.open_write_fail_action` is + set to ``5``. + .. ts:cv:: CONFIG proxy.config.http.cache.open_write_fail_action INT 0 :reloadable: :overridable: @@ -2311,6 +2314,12 @@ Dynamic Content & Content Negotiation :ts:cv:`proxy.config.http.cache.max_stale_age`. Otherwise, go to origin server. ``4`` Return a ``502`` error on either a cache miss or on a revalidation. + ``5`` Retry Cache Read on a Cache Write Lock failure. This option together + with `proxy.config.cache.enable_read_while_writer` configuration + allows to collapse concurrent requests without a need for any plugin. + Make sure to configure Read While Writer feature correctly following + the docs in Cache Basics section. Note that this option may result in + CACHE_LOOKUP_COMPLETE HOOK being called back more than once. ===== ====================================================================== Customizable User Response Pages diff --git a/proxy/http/HttpCacheSM.cc b/proxy/http/HttpCacheSM.cc index 731ebf2ee76..0832dbc54f6 100644 --- a/proxy/http/HttpCacheSM.cc +++ b/proxy/http/HttpCacheSM.cc @@ -159,7 +159,8 @@ HttpCacheSM::state_cache_open_write(int event, void *data) { STATE_ENTER(&HttpCacheSM::state_cache_open_write, event); ink_assert(captive_action.cancelled == 0); - pending_action = nullptr; + pending_action = nullptr; + bool read_retry_on_write_fail = false; switch (event) { case CACHE_EVENT_OPEN_WRITE: @@ -171,9 +172,26 @@ HttpCacheSM::state_cache_open_write(int event, void *data) break; case CACHE_EVENT_OPEN_WRITE_FAILED: - if (open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) { + if (master_sm->t_state.txn_conf->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY) { + // fall back to open_read_tries + // Note that when CACHE_WL_FAIL_ACTION_READ_RETRY is configured, max_cache_open_write_retries + // is automatically ignored. Make sure to not disable max_cache_open_read_retries + // with CACHE_WL_FAIL_ACTION_READ_RETRY as this results in proxy'ing to origin + // without write retries in both a cache miss or a cache refresh scenario. + if (open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) { + Debug("http_cache", "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. read retry triggered", + master_sm->sm_id, open_write_tries); + open_read_tries = 0; + read_retry_on_write_fail = true; + // make sure it doesn't loop indefinitely + open_write_tries = master_sm->t_state.txn_conf->max_cache_open_write_retries + 1; + } + } + if (read_retry_on_write_fail || open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) { // Retry open write; open_write_cb = false; + // reset captive_action since HttpSM cancelled it + captive_action.cancelled = 0; do_schedule_in(); } else { // The cache is hosed or full or something. @@ -188,18 +206,27 @@ HttpCacheSM::state_cache_open_write(int event, void *data) break; case EVENT_INTERVAL: - // Retry the cache open write if the number retries is less - // than or equal to the max number of open write retries - ink_assert(open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries); - Debug("http_cache", - "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. " - "retrying cache open write...", - master_sm->sm_id, open_write_tries); - - open_write(&cache_key, lookup_url, read_request_hdr, master_sm->t_state.cache_info.object_read, - static_cast( - (master_sm->t_state.cache_control.pin_in_cache_for < 0) ? 0 : master_sm->t_state.cache_control.pin_in_cache_for), - retry_write, false); + if (master_sm->t_state.txn_conf->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY) { + Debug("http_cache", + "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. " + "falling back to read retry...", + master_sm->sm_id, open_write_tries); + open_read_cb = false; + master_sm->handleEvent(CACHE_EVENT_OPEN_READ, data); + } else { + Debug("http_cache", + "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. " + "retrying cache open write...", + master_sm->sm_id, open_write_tries); + + // Retry the cache open write if the number retries is less + // than or equal to the max number of open write retries + ink_assert(open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries); + open_write(&cache_key, lookup_url, read_request_hdr, master_sm->t_state.cache_info.object_read, + static_cast( + (master_sm->t_state.cache_control.pin_in_cache_for < 0) ? 0 : master_sm->t_state.cache_control.pin_in_cache_for), + retry_write, false); + } break; default: diff --git a/proxy/http/HttpSM.cc b/proxy/http/HttpSM.cc index 98482ce61b0..c71be4aafb8 100644 --- a/proxy/http/HttpSM.cc +++ b/proxy/http/HttpSM.cc @@ -2456,6 +2456,15 @@ HttpSM::state_cache_open_write(int event, void *data) // INTENTIONAL FALL THROUGH // Allow for stale object to be served case CACHE_EVENT_OPEN_READ: + if (!t_state.cache_info.object_read) { + t_state.cache_open_write_fail_action = t_state.txn_conf->cache_open_write_fail_action; + // Note that CACHE_LOOKUP_COMPLETE may be invoked more than once + // if CACHE_WL_FAIL_ACTION_READ_RETRY is configured + ink_assert(t_state.cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY); + t_state.cache_lookup_result = HttpTransact::CACHE_LOOKUP_NONE; + t_state.cache_info.write_lock_state = HttpTransact::CACHE_WL_READ_RETRY; + break; + } // The write vector was locked and the cache_sm retried // and got the read vector again. cache_sm.cache_read_vc->get_http_info(&t_state.cache_info.object_read); diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index f5898f01676..920b23bab8f 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -2896,7 +2896,6 @@ HttpTransact::handle_cache_write_lock(State *s) } TRANSACT_RETURN(SM_ACTION_SEND_ERROR_CACHE_NOOP, nullptr); - return; default: s->cache_info.write_status = CACHE_WRITE_LOCK_MISS; remove_ims = true; @@ -2904,14 +2903,25 @@ HttpTransact::handle_cache_write_lock(State *s) } break; case CACHE_WL_READ_RETRY: - // Write failed but retried and got a vector to read - // We need to clean up our state so that transact does - // not assert later on. Then handle the open read hit - // s->request_sent_time = UNDEFINED_TIME; s->response_received_time = UNDEFINED_TIME; s->cache_info.action = CACHE_DO_LOOKUP; - remove_ims = true; + if (!s->cache_info.object_read) { + // Write failed and read retry triggered + // Clean up server_request and re-initiate + // Cache Lookup + ink_assert(s->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY); + s->cache_info.write_status = CACHE_WRITE_LOCK_MISS; + StateMachineAction_t next; + next = SM_ACTION_CACHE_LOOKUP; + s->next_action = next; + s->hdr_info.server_request.destroy(); + TRANSACT_RETURN(next, nullptr); + } + // Write failed but retried and got a vector to read + // We need to clean up our state so that transact does + // not assert later on. Then handle the open read hit + remove_ims = true; SET_VIA_STRING(VIA_DETAIL_CACHE_TYPE, VIA_DETAIL_CACHE); break; case CACHE_WL_INIT: diff --git a/proxy/http/HttpTransact.h b/proxy/http/HttpTransact.h index f7bfef58cad..68ec711e93e 100644 --- a/proxy/http/HttpTransact.h +++ b/proxy/http/HttpTransact.h @@ -251,6 +251,7 @@ class HttpTransact CACHE_WL_FAIL_ACTION_STALE_ON_REVALIDATE = 0x02, CACHE_WL_FAIL_ACTION_ERROR_ON_MISS_STALE_ON_REVALIDATE = 0x03, CACHE_WL_FAIL_ACTION_ERROR_ON_MISS_OR_REVALIDATE = 0x04, + CACHE_WL_FAIL_ACTION_READ_RETRY = 0x05, TOTAL_CACHE_WL_FAIL_ACTION_TYPES };