diff --git a/doc/admin-guide/logging/formatting.en.rst b/doc/admin-guide/logging/formatting.en.rst index 48ce9402a87..0a95f383d7f 100644 --- a/doc/admin-guide/logging/formatting.en.rst +++ b/doc/admin-guide/logging/formatting.en.rst @@ -151,6 +151,7 @@ Cache Details .. _crc: .. _crsc: .. _chm: +.. _ckh: .. _cwr: .. _cwtr: .. _crra: @@ -166,6 +167,10 @@ Field Source Description cluc Client Request Cache Lookup URL, also known as the :term:`cache key`, which is the canonicalized version of the client request URL. +ckh Proxy Cache Cache Key Hash. The base64-encoded cryptographic hash of the + effective cache key used for cache lookup and storage. This + is the actual key used to index cache objects. Empty + (``-``) when no cache lookup was performed. crc Proxy Cache Cache Result Code. The result of |TS| attempting to obtain the object from cache; :ref:`admin-logging-cache-results`. crsc Proxy Cache Cache Result Sub-Code. More specific code to complement the diff --git a/doc/developer-guide/api/functions/TSHttpTxnCacheKeyDigestGet.en.rst b/doc/developer-guide/api/functions/TSHttpTxnCacheKeyDigestGet.en.rst new file mode 100644 index 00000000000..28d8ea3fc91 --- /dev/null +++ b/doc/developer-guide/api/functions/TSHttpTxnCacheKeyDigestGet.en.rst @@ -0,0 +1,64 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed + with this work for additional information regarding copyright + ownership. The ASF licenses this file to you under the Apache + License, Version 2.0 (the "License"); you may not use this file + except in compliance with the License. You may obtain a copy of + the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied. See the License for the specific language governing + permissions and limitations under the License. + +.. include:: ../../../common.defs + +.. default-domain:: cpp + +TSHttpTxnCacheKeyDigestGet +************************** + +Synopsis +======== + +.. code-block:: cpp + + #include + +.. function:: TSReturnCode TSHttpTxnCacheKeyDigestGet(TSHttpTxn txnp, char *buffer, int *length) + +Description +=========== + +Get the effective cache key digest (cryptographic hash) that was used for +cache lookup or storage on this transaction. This is the raw hash bytes, +not a hex or base64 encoding. + +The digest size depends on the build configuration: 16 bytes for MD5 +(default) or 32 bytes for SHA-256 (FIPS mode). A 32-byte buffer is +sufficient for either mode: + +.. code-block:: c + + char digest[32]; + int digest_len = sizeof(digest); + if (TSHttpTxnCacheKeyDigestGet(txnp, digest, &digest_len) == TS_SUCCESS) { + // digest_len contains the actual number of bytes written + } + +Pass :code:`nullptr` for *buffer* to query the digest size without +copying. + +Returns :enumerator:`TS_SUCCESS` if a cache key was computed for the +transaction. Returns :enumerator:`TS_ERROR` if no cache lookup was +performed, or if *buffer* is non-null and *\*length* is smaller than the +digest size. In all cases *\*length* is set to the required digest size +on return. + +See Also +======== + +:func:`TSHttpTxnCacheLookupUrlGet` diff --git a/include/proxy/http/HttpCacheSM.h b/include/proxy/http/HttpCacheSM.h index 41623103d65..b1379e0555e 100644 --- a/include/proxy/http/HttpCacheSM.h +++ b/include/proxy/http/HttpCacheSM.h @@ -129,6 +129,12 @@ class HttpCacheSM : public Continuation return cache_read_vc ? (cache_read_vc->is_compressed_in_ram()) : false; } + const HttpCacheKey & + get_cache_key() const + { + return cache_key; + } + void set_open_read_tries(int value) { diff --git a/include/proxy/logging/LogAccess.h b/include/proxy/logging/LogAccess.h index e449b674b2c..51d80471f85 100644 --- a/include/proxy/logging/LogAccess.h +++ b/include/proxy/logging/LogAccess.h @@ -254,6 +254,7 @@ class LogAccess // int marshal_cache_write_code(char *); // INT int marshal_cache_write_transform_code(char *); // INT + int marshal_cache_key_hash(char *); // STR // other fields // diff --git a/include/proxy/logging/TransactionLogData.h b/include/proxy/logging/TransactionLogData.h index 0c1eefd53a1..0db209e5a20 100644 --- a/include/proxy/logging/TransactionLogData.h +++ b/include/proxy/logging/TransactionLogData.h @@ -75,8 +75,9 @@ class TransactionLogData int get_unmapped_url_len() const; // ===== Cache lookup URL ===== - char *get_cache_lookup_url_str() const; - int get_cache_lookup_url_len() const; + char *get_cache_lookup_url_str() const; + int get_cache_lookup_url_len() const; + const ts::CryptoHash *get_cache_lookup_hash() const; // ===== Client addressing ===== sockaddr const *get_client_addr() const; diff --git a/include/ts/ts.h b/include/ts/ts.h index 9b62d64fe8c..2376a70d51e 100644 --- a/include/ts/ts.h +++ b/include/ts/ts.h @@ -2768,6 +2768,25 @@ TSReturnCode TSHttpTxnCachedRespModifiableGet(TSHttpTxn txnp, TSMBuffer *bufp, T TSReturnCode TSHttpTxnCacheLookupStatusSet(TSHttpTxn txnp, int cachelookup); TSReturnCode TSHttpTxnCacheLookupUrlGet(TSHttpTxn txnp, TSMBuffer bufp, TSMLoc obj); TSReturnCode TSHttpTxnCacheLookupUrlSet(TSHttpTxn txnp, TSMBuffer bufp, TSMLoc obj); + +/** + Gets the effective cache key digest (cryptographic hash) that was + used for cache lookup or storage on this transaction. The digest + is returned as raw bytes — 16 bytes for MD5 (default) or 32 bytes + for SHA-256 (FIPS mode). A buffer of at least 32 bytes is + recommended to accommodate either configuration. + + @param txnp the transaction. + @param buffer caller-provided buffer to receive the raw hash bytes. + If @c nullptr, only @a length is set (size query). + @param length in: capacity of @a buffer in bytes. + out: actual digest size in bytes. + + @return @c TS_SUCCESS if a cache key was computed for this + transaction, @c TS_ERROR if no cache lookup was performed or if + @a buffer is non-null and too small. + */ +TSReturnCode TSHttpTxnCacheKeyDigestGet(TSHttpTxn txnp, char *buffer, int *length); TSReturnCode TSHttpTxnPrivateSessionSet(TSHttpTxn txnp, int private_session); const char *TSHttpTxnCacheDiskPathGet(TSHttpTxn txnp, int *length); int TSHttpTxnBackgroundFillStarted(TSHttpTxn txnp); diff --git a/src/api/InkAPI.cc b/src/api/InkAPI.cc index 2f62c45f755..25308b0532e 100644 --- a/src/api/InkAPI.cc +++ b/src/api/InkAPI.cc @@ -4477,6 +4477,35 @@ TSHttpTxnCacheLookupUrlSet(TSHttpTxn txnp, TSMBuffer bufp, TSMLoc obj) return TS_SUCCESS; } +TSReturnCode +TSHttpTxnCacheKeyDigestGet(TSHttpTxn txnp, char *buffer, int *length) +{ + sdk_assert(sdk_sanity_check_txn(txnp) == TS_SUCCESS); + sdk_assert(length != nullptr); + + HttpSM *sm = reinterpret_cast(txnp); + const CryptoHash &hash = sm->get_cache_sm().get_cache_key().hash; + constexpr int size = CRYPTO_HASH_SIZE; + + if (hash.is_zero()) { + return TS_ERROR; + } + + if (buffer == nullptr) { + *length = size; + return TS_SUCCESS; + } + + if (*length < size) { + *length = size; + return TS_ERROR; + } + + memcpy(buffer, hash.u8, size); + *length = size; + return TS_SUCCESS; +} + /** * timeout is in msec * overrides as proxy.config.http.transaction_active_timeout_out diff --git a/src/proxy/logging/Log.cc b/src/proxy/logging/Log.cc index 49a21978b70..9a0ebd1eb11 100644 --- a/src/proxy/logging/Log.cc +++ b/src/proxy/logging/Log.cc @@ -505,6 +505,10 @@ Log::init_fields() global_field_list.add(field, false); field_symbol_hash.emplace("cluc", field); + field = new LogField("cache_key_hash", "ckh", LogField::STRING, &LogAccess::marshal_cache_key_hash, &LogAccess::unmarshal_str); + global_field_list.add(field, false); + field_symbol_hash.emplace("ckh", field); + field = new LogField("client_sni_server_name", "cssn", LogField::STRING, &LogAccess::marshal_client_sni_server_name, &LogAccess::unmarshal_str); global_field_list.add(field, false); diff --git a/src/proxy/logging/LogAccess.cc b/src/proxy/logging/LogAccess.cc index 91b8135561a..2bb9ed83cfb 100644 --- a/src/proxy/logging/LogAccess.cc +++ b/src/proxy/logging/LogAccess.cc @@ -35,6 +35,7 @@ #include "swoc/BufferWriter.h" #include "tscore/Encoding.h" #include "tscore/ink_inet.h" +#include "tscore/ink_base64.h" char INVALID_STR[] = "!INVALID_STR!"; @@ -3030,6 +3031,35 @@ LogAccess::marshal_cache_write_transform_code(char *buf) return INK_MIN_ALIGN; } +/*------------------------------------------------------------------------- + -------------------------------------------------------------------------*/ + +int +LogAccess::marshal_cache_key_hash(char *buf) +{ + const ts::CryptoHash *hash = m_data->get_cache_lookup_hash(); + + if (!hash || hash->is_zero()) { + if (buf) { + marshal_str(buf, "-", padded_length(2)); + } + return padded_length(2); + } + + constexpr size_t b64_bufsize = ats_base64_encode_dstlen(CRYPTO_HASH_SIZE); + char b64_str[b64_bufsize]; + size_t b64_len = 0; + + ats_base64_encode(reinterpret_cast(hash->u8), CRYPTO_HASH_SIZE, b64_str, b64_bufsize, &b64_len); + + int len = padded_length(b64_len + 1); + + if (buf) { + marshal_str(buf, b64_str, len); + } + return len; +} + /*------------------------------------------------------------------------- -------------------------------------------------------------------------*/ diff --git a/src/proxy/logging/TransactionLogData.cc b/src/proxy/logging/TransactionLogData.cc index af44b5e374b..b1b50e3730d 100644 --- a/src/proxy/logging/TransactionLogData.cc +++ b/src/proxy/logging/TransactionLogData.cc @@ -366,6 +366,16 @@ TransactionLogData::get_cache_lookup_url_len() const return 0; } +const ts::CryptoHash * +TransactionLogData::get_cache_lookup_hash() const +{ + if (likely(m_http_sm != nullptr)) { + return &(m_http_sm->get_cache_sm().get_cache_key().hash); + } + + return nullptr; +} + // ===== Client addressing ===== sockaddr const * diff --git a/tests/gold_tests/logging/log-milestone-fields.test.py b/tests/gold_tests/logging/log-milestone-fields.test.py index e2a47ccb346..72f5f8617b0 100644 --- a/tests/gold_tests/logging/log-milestone-fields.test.py +++ b/tests/gold_tests/logging/log-milestone-fields.test.py @@ -39,9 +39,9 @@ class MilestoneFieldsTest: - No epoch-length garbage values (> 1_000_000_000) """ - # All Phase 1 msdms fields plus ms and cache result code for identification. + # All Phase 1 msdms fields plus ms, cache result code, and cache key hash. LOG_FORMAT = ( - 'crc=% ms=%' + 'crc=% ckh=% ms=%' ' c_ttfb=%<{TS_MILESTONE_UA_BEGIN_WRITE-TS_MILESTONE_SM_START}msdms>' ' c_tls=%<{TS_MILESTONE_TLS_HANDSHAKE_END-TS_MILESTONE_TLS_HANDSHAKE_START}msdms>' ' c_hdr=%<{TS_MILESTONE_UA_READ_HEADER_DONE-TS_MILESTONE_SM_START}msdms>' diff --git a/tests/gold_tests/logging/verify_milestone_fields.py b/tests/gold_tests/logging/verify_milestone_fields.py index 419b8ccc9a3..c64af28a568 100644 --- a/tests/gold_tests/logging/verify_milestone_fields.py +++ b/tests/gold_tests/logging/verify_milestone_fields.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 ''' -Validate milestone timing fields in an ATS log file. +Validate milestone timing fields and cache key hash in an ATS log file. Parses key=value log lines and checks: - All expected fields are present @@ -9,6 +9,8 @@ - Cache miss lines have ms > 0 and origin-phase fields populated - Cache hit lines have hit_proc and hit_xfer populated - The miss-path chain sums to approximately c_ttfb + - Cache key hash (ckh) is a valid base64 string on every line + - Cache key hash is identical between miss and hit for the same URL ''' # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -26,10 +28,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re import sys ALL_FIELDS = [ 'crc', + 'ckh', 'ms', 'c_ttfb', 'c_tls', @@ -47,7 +51,7 @@ 'hit_xfer', ] -TIMING_FIELDS = [f for f in ALL_FIELDS if f != 'crc'] +TIMING_FIELDS = [f for f in ALL_FIELDS if f not in ('crc', 'ckh')] # Fields that form the contiguous miss-path chain to c_ttfb: # c_ttfb = c_hdr + c_proc + cache + dns + o_conn + o_wait + o_hdr + o_proc @@ -77,6 +81,15 @@ def validate_line(fields: dict[str, str], line_num: int) -> list[str]: if name not in fields: errors.append(f'line {line_num}: missing field "{name}"') + ckh = fields.get('ckh') + if ckh is not None: + if ckh == '-': + errors.append(f'line {line_num}: ckh should not be "-" (cache lookup was performed)') + elif not re.fullmatch(r'[A-Za-z0-9+/]+=*', ckh): + errors.append(f'line {line_num}: ckh is not valid base64: {ckh!r}') + elif len(ckh) not in (24, 44): + errors.append(f'line {line_num}: ckh has unexpected length {len(ckh)} (expected 24 or 44)') + for name in TIMING_FIELDS: val_str = fields.get(name) if val_str is None: @@ -183,6 +196,7 @@ def main(): all_errors = [] miss_found = False hit_found = False + cache_key_hashes = set() for i, line in enumerate(lines, start=1): fields = parse_line(line) @@ -191,6 +205,9 @@ def main(): miss_found = True if 'HIT' in crc and 'MISS' not in crc: hit_found = True + ckh = fields.get('ckh') + if ckh and ckh != '-': + cache_key_hashes.add(ckh) errors = validate_line(fields, i) all_errors.extend(errors) @@ -198,6 +215,10 @@ def main(): all_errors.append('No cache miss line found in log') if not hit_found: all_errors.append('No cache hit line found in log') + if len(cache_key_hashes) != 1: + all_errors.append( + f'Expected identical cache key hash on all lines, got {len(cache_key_hashes)} ' + f'distinct values: {cache_key_hashes}') if all_errors: for err in all_errors: