diff --git a/doc/admin-guide/plugins/cache_promote.en.rst b/doc/admin-guide/plugins/cache_promote.en.rst index ea882fb8dce..7e3a74926de 100644 --- a/doc/admin-guide/plugins/cache_promote.en.rst +++ b/doc/admin-guide/plugins/cache_promote.en.rst @@ -21,7 +21,7 @@ Cache Promote Plugin ******************** The :program:`cache_promote` plugin provides a means to control when an object should -be allowed to enter the cache. This is orthogonal from normal Cache-Control +be allowed to enter the cache. This is orthogonal from normal ``Cache-Control`` directives, providing a different set of policies to apply. The typical use case for this plugin is when you have a very large data set, where you want to avoid churning the ATS cache for the long tail content. @@ -50,7 +50,13 @@ If :option:`--policy` is set to ``lru`` the following options are also available .. option:: --hits - The minimum number of hits before promotion. + The minimum number of requests before promotion. + +.. option:: --bytes + + In addition to requests, also count bytes that are cache misses. If specified, + default is ``0``, whichever triggers first of bytes and requests (hits) will + cause promotion. .. option:: --buckets diff --git a/plugins/cache_promote/cache_promote.cc b/plugins/cache_promote/cache_promote.cc index 79372917da3..aa110a101d9 100644 --- a/plugins/cache_promote/cache_promote.cc +++ b/plugins/cache_promote/cache_promote.cc @@ -25,6 +25,7 @@ #include "configs.h" const char *PLUGIN_NAME = "cache_promote"; +int TXN_ARG_IDX; // This has to be a global here. I tried doing a classic singleton (with a getInstance()) in the PolicyManager, // but then reloading the DSO does not work. What happens is that the old singleton is stil there, even though @@ -47,7 +48,7 @@ cont_handle_policy(TSCont contp, TSEvent event, void *edata) PromotionConfig *config = static_cast(TSContDataGet(contp)); switch (event) { - // Main HOOK + // After the cache lookups check if it should be promoted on cache misses case TS_EVENT_HTTP_CACHE_LOOKUP_COMPLETE: if (!TSHttpTxnIsInternal(txnp)) { int obj_status; @@ -60,6 +61,13 @@ cont_handle_policy(TSCont contp, TSEvent event, void *edata) TSDebug(PLUGIN_NAME, "cache-status is %d, and leaving cache on (promoted)", obj_status); } else { TSDebug(PLUGIN_NAME, "cache-status is %d, and turning off the cache (not promoted)", obj_status); + if (config->getPolicy()->countBytes()) { + // Need to schedule this continuation for read-response-header-hook as well. + TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, contp); + // This is needed to make sure that we free any data retained in the TXN slot even if the + // transaction is terminated early. + TSHttpTxnHookAdd(txnp, TS_HTTP_TXN_CLOSE_HOOK, contp); + } TSHttpTxnServerRespNoStoreSet(txnp, 1); } break; @@ -67,21 +75,29 @@ cont_handle_policy(TSCont contp, TSEvent event, void *edata) // Do nothing, just let it handle the lookup. TSDebug(PLUGIN_NAME, "cache-status is %d (hit), nothing to do", obj_status); - if (config->getPolicy()->stats_enabled) { - TSStatIntIncrement(config->getPolicy()->cache_hits_id, 1); + if (config->getPolicy()->_stats_enabled) { + TSStatIntIncrement(config->getPolicy()->_cache_hits_id, 1); } break; } } - - if (config->getPolicy()->stats_enabled) { - TSStatIntIncrement(config->getPolicy()->total_requests_id, 1); + if (config->getPolicy()->_stats_enabled) { + TSStatIntIncrement(config->getPolicy()->_total_requests_id, 1); } } else { TSDebug(PLUGIN_NAME, "request is an internal (plugin) request, implicitly promoted"); } break; + // This is the event when we want to count the bytes cache miss as well as hits + case TS_EVENT_HTTP_READ_RESPONSE_HDR: + config->getPolicy()->addBytes(txnp); + break; + + case TS_EVENT_HTTP_TXN_CLOSE: + config->getPolicy()->cleanup(txnp); + break; + // Should not happen default: TSDebug(PLUGIN_NAME, "unhandled event %d", static_cast(event)); @@ -110,7 +126,13 @@ TSRemapInit(TSRemapInterface *api_info, char *errbuf, int errbuf_size) return TS_ERROR; } - TSDebug(PLUGIN_NAME, "remap plugin is successfully initialized"); + // Reserve a TXN slot for storing the calculated URL hash key + if (TS_SUCCESS != TSUserArgIndexReserve(TS_USER_ARGS_TXN, PLUGIN_NAME, "cache_promote URL hash key", &TXN_ARG_IDX)) { + strncpy(errbuf, "[tsremap_init] - Failed to reserve the TXN user argument slot", errbuf_size - 1); + return TS_ERROR; + } + + TSDebug(PLUGIN_NAME, "remap plugin is successfully initialized, TXN_IDX = %d", TXN_ARG_IDX); return TS_SUCCESS; /* success */ } diff --git a/plugins/cache_promote/chance_policy.h b/plugins/cache_promote/chance_policy.h index 649c5e7a238..d7b45a3683a 100644 --- a/plugins/cache_promote/chance_policy.h +++ b/plugins/cache_promote/chance_policy.h @@ -29,7 +29,7 @@ class ChancePolicy : public PromotionPolicy bool doPromote(TSHttpTxn /* txnp ATS_UNUSED */) override { TSDebug(PLUGIN_NAME, "ChancePolicy::doPromote(%f)", getSample()); - incrementStat(promoted_id, 1); + incrementStat(_promoted_id, 1); return true; } @@ -51,9 +51,9 @@ class ChancePolicy : public PromotionPolicy { std::string_view remap_identifier = remap_id; const std::tuple stats[] = { - {"cache_hits", &cache_hits_id}, - {"promoted", &promoted_id}, - {"total_requests", &total_requests_id}, + {"cache_hits", &_cache_hits_id}, + {"promoted", &_promoted_id}, + {"total_requests", &_total_requests_id}, }; if (nullptr == remap_id) { diff --git a/plugins/cache_promote/configs.cc b/plugins/cache_promote/configs.cc index 82e13029bba..0cea3dd7461 100644 --- a/plugins/cache_promote/configs.cc +++ b/plugins/cache_promote/configs.cc @@ -26,12 +26,13 @@ // to add to this list, making them more modular. static const struct option longopt[] = { {const_cast("policy"), required_argument, nullptr, 'p'}, + {const_cast("stats-enable-with-id"), required_argument, nullptr, 'e'}, // This is for both Chance and LRU (optional) policy {const_cast("sample"), required_argument, nullptr, 's'}, // For the LRU policy {const_cast("buckets"), required_argument, nullptr, 'b'}, {const_cast("hits"), required_argument, nullptr, 'h'}, - {const_cast("stats-enable-with-id"), required_argument, nullptr, 'e'}, + {const_cast("bytes"), required_argument, nullptr, 'B'}, {const_cast("label"), required_argument, nullptr, 'l'}, // EOF {nullptr, no_argument, nullptr, '\0'}, @@ -70,7 +71,7 @@ PromotionConfig::factory(int argc, char *argv[]) return false; } else { if (_policy && _policy->stats_add(optarg)) { - _policy->stats_enabled = true; + _policy->_stats_enabled = true; TSDebug(PLUGIN_NAME, "stats collection is enabled"); } } diff --git a/plugins/cache_promote/lru_policy.cc b/plugins/cache_promote/lru_policy.cc index 07eeef8b1f0..f012085cd5d 100644 --- a/plugins/cache_promote/lru_policy.cc +++ b/plugins/cache_promote/lru_policy.cc @@ -16,12 +16,49 @@ limitations under the License. */ #include +#include #include "lru_policy.h" #define MINIMUM_BUCKET_SIZE 10 static LRUEntry NULL_LRU_ENTRY; // Used to create an "empty" new LRUEntry +// Initialize the LRU hash key from the TXN's URL +bool +LRUHash::initFromUrl(TSHttpTxn txnp) +{ + bool ret = false; + TSMLoc c_url = TS_NULL_MLOC; + TSMBuffer reqp; + TSMLoc req_hdr; + + if (TS_SUCCESS != TSHttpTxnClientReqGet(txnp, &reqp, &req_hdr)) { + return false; + } + + if (TS_SUCCESS == TSUrlCreate(reqp, &c_url)) { + if (TS_SUCCESS == TSHttpTxnCacheLookupUrlGet(txnp, reqp, c_url)) { + int url_len = 0; + char *url = TSUrlStringGet(reqp, c_url, &url_len); + + if (url && url_len > 0) { + SHA_CTX sha; + + SHA1_Init(&sha); + TSDebug(PLUGIN_NAME, "LRUHash::initFromUrl(%.*s%s)", url_len > 100 ? 100 : url_len, url, url_len > 100 ? "..." : ""); + SHA1_Update(&sha, url, url_len); + SHA1_Final(_hash, &sha); + TSfree(url); + ret = true; + } + } + TSHandleMLocRelease(reqp, TS_NULL_MLOC, c_url); + } + TSHandleMLocRelease(reqp, TS_NULL_MLOC, req_hdr); + + return ret; +} + LRUPolicy::~LRUPolicy() { TSDebug(PLUGIN_NAME, "LRUPolicy DTOR"); @@ -52,6 +89,9 @@ LRUPolicy::parseOption(int opt, char *optarg) case 'h': _hits = static_cast(strtol(optarg, nullptr, 10)); break; + case 'B': + _bytes = static_cast(strtoll(optarg, nullptr, 10)); + break; case 'l': _label = optarg; break; @@ -72,101 +112,126 @@ LRUPolicy::doPromote(TSHttpTxn txnp) { LRUHash hash; LRUMap::iterator map_it; - char *url = nullptr; - int url_len = 0; - bool ret = false; - TSMBuffer request; - TSMLoc req_hdr; - - if (TS_SUCCESS == TSHttpTxnClientReqGet(txnp, &request, &req_hdr)) { - TSMLoc c_url = TS_NULL_MLOC; + bool ret = false; - // Get the cache key URL (for now), since this has better lookup behavior when using - // e.g. the cachekey plugin. - if (TS_SUCCESS == TSUrlCreate(request, &c_url)) { - if (TS_SUCCESS == TSHttpTxnCacheLookupUrlGet(txnp, request, c_url)) { - url = TSUrlStringGet(request, c_url, &url_len); - TSHandleMLocRelease(request, TS_NULL_MLOC, c_url); - } - } - TSHandleMLocRelease(request, TS_NULL_MLOC, req_hdr); - } - - // Generally shouldn't happen ... - if (!url) { + if (!hash.initFromUrl(txnp)) { return false; } - TSDebug(PLUGIN_NAME, "LRUPolicy::doPromote(%.*s%s)", url_len > 100 ? 100 : url_len, url, url_len > 100 ? "..." : ""); - hash.init(url, url_len); - TSfree(url); - // We have to hold the lock across all list and hash access / updates TSMutexLock(_lock); map_it = _map.find(&hash); if (_map.end() != map_it) { + auto &[map_key, map_val] = *map_it; + auto &[val_key, val_hits, val_bytes] = *(map_it->second); + + // This is beacuse compilers before gcc 8 aren't smart enough to ignore the unused structured bindings + (void)val_key; + // We have an entry in the LRU TSAssert(_list_size > 0); // mismatch in the LRUs hash and list, shouldn't happen - incrementStat(lru_hit_id, 1); - if (++(map_it->second->second) >= _hits) { + incrementStat(_lru_hit_id, 1); + if (++val_hits >= _hits || (_bytes > 0 && val_bytes > _bytes)) { // Promoted! Cleanup the LRU, and signal success. Save the promoted entry on the freelist. TSDebug(PLUGIN_NAME, "saving the LRUEntry to the freelist"); - _freelist.splice(_freelist.begin(), _list, map_it->second); + _freelist.splice(_freelist.begin(), _list, map_val); ++_freelist_size; --_list_size; - _map.erase(map_it->first); - incrementStat(promoted_id, 1); - incrementStat(freelist_size_id, 1); - decrementStat(lru_size_id, 1); + _map.erase(map_key); + incrementStat(_promoted_id, 1); + incrementStat(_freelist_size_id, 1); + decrementStat(_lru_size_id, 1); ret = true; } else { // It's still not promoted, make sure it's moved to the front of the list - TSDebug(PLUGIN_NAME, "still not promoted, got %d hits so far", map_it->second->second); - _list.splice(_list.begin(), _list, map_it->second); + TSDebug(PLUGIN_NAME, "still not promoted, got %d hits so far and %" PRId64 " bytes", val_hits, val_bytes); + _list.splice(_list.begin(), _list, map_val); } } else { // New LRU entry for the URL, try to repurpose the list entry as much as possible - incrementStat(lru_miss_id, 1); + incrementStat(_lru_miss_id, 1); if (_list_size >= _buckets) { TSDebug(PLUGIN_NAME, "repurposing last LRUHash entry"); _list.splice(_list.begin(), _list, --_list.end()); - _map.erase(&(_list.begin()->first)); - incrementStat(lru_vacated_id, 1); + _map.erase(&(std::get<0>(*_list.begin()))); // Get the hash from the first list element + incrementStat(_lru_vacated_id, 1); } else if (_freelist_size > 0) { TSDebug(PLUGIN_NAME, "reusing LRUEntry from freelist"); _list.splice(_list.begin(), _freelist, _freelist.begin()); --_freelist_size; ++_list_size; - incrementStat(lru_size_id, 1); - decrementStat(freelist_size_id, 1); + incrementStat(_lru_size_id, 1); + decrementStat(_freelist_size_id, 1); } else { TSDebug(PLUGIN_NAME, "creating new LRUEntry"); _list.push_front(NULL_LRU_ENTRY); ++_list_size; - incrementStat(lru_size_id, 1); + incrementStat(_lru_size_id, 1); } // Update the "new" LRUEntry and add it to the hash - _list.begin()->first = hash; - _list.begin()->second = 1; - _map[&(_list.begin()->first)] = _list.begin(); + *_list.begin() = {hash, 1, 0}; + _map[&(std::get<0>(*_list.begin()))] = _list.begin(); } TSMutexUnlock(_lock); + // If we didn't promote, and we want to count bytes, save away the calculated hash for later use + if (false == ret && countBytes()) { + TSUserArgSet(txnp, TXN_ARG_IDX, static_cast(new LRUHash(hash))); + } else { + TSUserArgSet(txnp, TXN_ARG_IDX, nullptr); + } + return ret; } +void +LRUPolicy::addBytes(TSHttpTxn txnp) +{ + LRUHash *hash = static_cast(TSUserArgGet(txnp, TXN_ARG_IDX)); + + if (hash) { + LRUMap::iterator map_it; + + // We have to hold the lock across all list and hash access / updates + TSMutexLock(_lock); + map_it = _map.find(hash); + if (_map.end() != map_it) { + TSMBuffer resp; + TSMLoc resp_hdr; + + if (TS_SUCCESS == TSHttpTxnServerRespGet(txnp, &resp, &resp_hdr)) { + TSMLoc field_loc = TSMimeHdrFieldFind(resp, resp_hdr, TS_MIME_FIELD_CONTENT_LENGTH, TS_MIME_LEN_CONTENT_LENGTH); + + if (field_loc) { + auto &[val_key, val_hits, val_bytes] = *(map_it->second); + int64_t cl = TSMimeHdrFieldValueInt64Get(resp, resp_hdr, field_loc, -1); + + // This is beacuse compilers before gcc 8 aren't smart enough to ignore the unused structured bindings + (void)val_key, (void)val_hits; + + val_bytes += cl; + TSDebug(PLUGIN_NAME, "Added %" PRId64 " bytes for LRU entry", cl); + TSHandleMLocRelease(resp, resp_hdr, field_loc); + } + TSHandleMLocRelease(resp, TS_NULL_MLOC, resp_hdr); + } + } + TSMutexUnlock(_lock); + } +} + bool LRUPolicy::stats_add(const char *remap_id) { std::string_view remap_identifier = remap_id; const std::tuple stats[] = { - {"cache_hits", &cache_hits_id}, {"freelist_size", &freelist_size_id}, - {"lru_size", &lru_size_id}, {"lru_hit", &lru_hit_id}, - {"lru_miss", &lru_miss_id}, {"lru_vacated", &lru_vacated_id}, - {"promoted", &promoted_id}, {"total_requests", &total_requests_id}, + {"cache_hits", &_cache_hits_id}, {"freelist_size", &_freelist_size_id}, + {"lru_size", &_lru_size_id}, {"lru_hit", &_lru_hit_id}, + {"lru_miss", &_lru_miss_id}, {"lru_vacated", &_lru_vacated_id}, + {"promoted", &_promoted_id}, {"total_requests", &_total_requests_id}, }; if (nullptr == remap_id) { diff --git a/plugins/cache_promote/lru_policy.h b/plugins/cache_promote/lru_policy.h index 390a9497b93..99151206e2e 100644 --- a/plugins/cache_promote/lru_policy.h +++ b/plugins/cache_promote/lru_policy.h @@ -21,6 +21,7 @@ #include #include #include +#include #include "policy.h" @@ -40,6 +41,12 @@ class LRUHash LRUHash() { TSDebug(PLUGIN_NAME, "LRUHash() CTOR"); } ~LRUHash() { TSDebug(PLUGIN_NAME, "~LRUHash() DTOR"); } + LRUHash(const LRUHash &h) + { + TSDebug(PLUGIN_NAME, "Copy CTOR an LRUHash object"); + memcpy(_hash, h._hash, sizeof(_hash)); + } + LRUHash & operator=(const LRUHash &h) { @@ -50,15 +57,8 @@ class LRUHash return *this; } - void - init(char *data, int len) - { - SHA_CTX sha; - - SHA1_Init(&sha); - SHA1_Update(&sha, data, len); - SHA1_Final(_hash, &sha); - } + // Initialize the hash key from the TXN's URL + bool initFromUrl(TSHttpTxn txnp); private: u_char _hash[SHA_DIGEST_LENGTH]; @@ -78,9 +78,9 @@ struct LRUHashHasher { } }; -typedef std::pair LRUEntry; -using LRUList = std::list; -typedef std::unordered_map LRUMap; +using LRUEntry = std::tuple; +using LRUList = std::list; +using LRUMap = std::unordered_map; class LRUPolicy : public PromotionPolicy { @@ -91,11 +91,18 @@ class LRUPolicy : public PromotionPolicy bool parseOption(int opt, char *optarg) override; bool doPromote(TSHttpTxn txnp) override; bool stats_add(const char *remap_id) override; + void addBytes(TSHttpTxn txnp) override; + + bool + countBytes() const override + { + return _bytes > 0; + } void usage() const override { - TSError("[%s] Usage: @plugin=%s.so @pparam=--policy=lru @pparam=--buckets= --hits= --sample=", PLUGIN_NAME, + TSError("[%s] Usage: @plugin=%s.so @pparam=--policy=lru @pparam=--buckets= --hits= --bytes= --sample=

", PLUGIN_NAME, PLUGIN_NAME); } @@ -108,12 +115,26 @@ class LRUPolicy : public PromotionPolicy const std::string id() const override { - return _label + ";LRU=b:" + std::to_string(_buckets) + ",h:" + std::to_string(_hits); + return _label + ";LRU=b:" + std::to_string(_buckets) + ",h:" + std::to_string(_hits) + ",B:" + std::to_string(_bytes); + } + + void + cleanup(TSHttpTxn txnp) override + { + LRUHash *hash = static_cast(TSUserArgGet(txnp, TXN_ARG_IDX)); + + // Delete the hash, and remove the pointer from the TXN user arg slot (to be safe) + if (hash) { + delete hash; + TSUserArgSet(txnp, TXN_ARG_IDX, nullptr); + } } private: - unsigned _buckets = 1000; - unsigned _hits = 10; + unsigned _buckets = 1000; + unsigned _hits = 10; + int64_t _bytes = 0; + std::string _label = ""; // For the LRU. Note that we keep track of the List sizes, because some versions fo STL have broken // implementations of size(), making them obsessively slow on calling ::size(). @@ -123,10 +144,10 @@ class LRUPolicy : public PromotionPolicy size_t _list_size = 0, _freelist_size = 0; // internal stats ids - int freelist_size_id = -1; - int lru_size_id = -1; - int lru_hit_id = -1; - int lru_miss_id = -1; - int lru_vacated_id = -1; - int promoted_id = -1; + int _freelist_size_id = -1; + int _lru_size_id = -1; + int _lru_hit_id = -1; + int _lru_miss_id = -1; + int _lru_vacated_id = -1; + int _promoted_id = -1; }; diff --git a/plugins/cache_promote/policy.h b/plugins/cache_promote/policy.h index ef06f1bfea0..c0b37ee91a3 100644 --- a/plugins/cache_promote/policy.h +++ b/plugins/cache_promote/policy.h @@ -23,6 +23,8 @@ #include "ts/ts.h" #include "ts/remap.h" +extern int TXN_ARG_IDX; + #define MAX_STAT_LENGTH (1 << 8) extern const char *PLUGIN_NAME; @@ -57,7 +59,7 @@ class PromotionPolicy void decrementStat(const int stat, const int amount) { - if (stats_enabled) { + if (_stats_enabled) { TSStatIntDecrement(stat, amount); } } @@ -65,7 +67,7 @@ class PromotionPolicy void incrementStat(const int stat, const int amount) { - if (stats_enabled) { + if (_stats_enabled) { TSStatIntIncrement(stat, amount); } } @@ -82,6 +84,24 @@ class PromotionPolicy return ""; } + // Cleanup any internal state / memory that may be in use + virtual void + cleanup(TSHttpTxn txnp) + { + } + + // These are for any policy that also wants to count byters are a promotion criteria + virtual bool + countBytes() const + { + return false; + } + + virtual void + addBytes(TSHttpTxn txnp) + { + } + bool doSample() const; int create_stat(std::string_view name, std::string_view remap_identifier); @@ -92,14 +112,11 @@ class PromotionPolicy virtual bool stats_add(const char *remap_id) = 0; // when true stats are incremented. - bool stats_enabled = false; - int cache_hits_id = -1; - int promoted_id = -1; - int total_requests_id = -1; + bool _stats_enabled = false; + int _cache_hits_id = -1; + int _promoted_id = -1; + int _total_requests_id = -1; private: float _sample = 0.0; - -protected: - std::string _label = ""; };