diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc index fb703be2036..cde8e989de8 100644 --- a/proxy/hdrs/HTTP.cc +++ b/proxy/hdrs/HTTP.cc @@ -1753,7 +1753,7 @@ class UrlPrintHack }; char * -HTTPHdr::url_string_get(Arena *arena, int *length, bool normalized) +HTTPHdr::url_string_get(Arena *arena, int *length) { char *zret = nullptr; UrlPrintHack hack(this); @@ -1763,14 +1763,13 @@ HTTPHdr::url_string_get(Arena *arena, int *length, bool normalized) // even uglier but it's less so than duplicating this entire method to // change that one thing. - zret = (arena == USE_HDR_HEAP_MAGIC) ? m_url_cached.string_get_ref(length, normalized) : - m_url_cached.string_get(arena, length, normalized); + zret = (arena == USE_HDR_HEAP_MAGIC) ? m_url_cached.string_get_ref(length) : m_url_cached.string_get(arena, length); } return zret; } int -HTTPHdr::url_print(char *buff, int length, int *offset, int *skip, bool normalized) +HTTPHdr::url_print(char *buff, int length, int *offset, int *skip, unsigned normalization_flags) { ink_release_assert(offset); ink_release_assert(skip); @@ -1778,18 +1777,18 @@ HTTPHdr::url_print(char *buff, int length, int *offset, int *skip, bool normaliz int zret = 0; UrlPrintHack hack(this); if (hack.is_valid()) { - zret = m_url_cached.print(buff, length, offset, skip, normalized); + zret = m_url_cached.print(buff, length, offset, skip, normalization_flags); } return zret; } int -HTTPHdr::url_printed_length() +HTTPHdr::url_printed_length(unsigned normalization_flags) { int zret = -1; UrlPrintHack hack(this); if (hack.is_valid()) { - zret = m_url_cached.length_get(); + zret = m_url_cached.length_get(normalization_flags); } return zret; } diff --git a/proxy/hdrs/HTTP.h b/proxy/hdrs/HTTP.h index 0ae1077b585..3da31a07d85 100644 --- a/proxy/hdrs/HTTP.h +++ b/proxy/hdrs/HTTP.h @@ -546,9 +546,8 @@ class HTTPHdr : public MIMEHdr and invoking @c URL::string_get if the host is in a header field and not explicitly in the URL. */ - char *url_string_get(Arena *arena = nullptr, ///< Arena to use, or @c malloc if NULL. - int *length = nullptr, ///< Store string length here. - bool normalized = false ///< Scheme and host normalized to lower case letters. + char *url_string_get(Arena *arena = nullptr, ///< Arena to use, or @c malloc if NULL. + int *length = nullptr ///< Store string length here. ); /** Get a string with the effective URL in it. This is automatically allocated if needed in the request heap. @@ -562,17 +561,17 @@ class HTTPHdr : public MIMEHdr Output is not null terminated. @return 0 on failure, non-zero on success. */ - int url_print(char *buff, ///< Output buffer - int length, ///< Length of @a buffer - int *offset, ///< [in,out] ??? - int *skip, ///< [in,out] ??? - bool normalized = false ///< host/scheme normalized to lower case + int url_print(char *buff, ///< Output buffer + int length, ///< Length of @a buffer + int *offset, ///< [in,out] ??? + int *skip, ///< [in,out] ??? + unsigned normalization_flags = URLNormalize::NONE ///< host/scheme normalized to lower case ); /** Return the length of the URL that url_print() will create. @return -1 on failure, non-negative on success. */ - int url_printed_length(); + int url_printed_length(unsigned normalizaion_flags = URLNormalize::NONE); /** Get the URL path. This is a reference, not allocated. diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc index 4f7b4e8e0f4..70fc177b626 100644 --- a/proxy/hdrs/URL.cc +++ b/proxy/hdrs/URL.cc @@ -612,40 +612,41 @@ url_clear_string_ref(URLImpl *url) } char * -url_string_get_ref(HdrHeap *heap, URLImpl *url, int *length, bool normalized) +url_string_get_ref(HdrHeap *heap, URLImpl *url, int *length, unsigned normalization_flags) { if (!url) { return nullptr; } - if (url->m_ptr_printed_string && url->m_clean) { + if (url->m_ptr_printed_string && url->m_clean && (normalization_flags == url->m_normalization_flags)) { if (length) { *length = url->m_len_printed_string; } return const_cast(url->m_ptr_printed_string); } else { // either not clean or never printed - int len = url_length_get(url); + int len = url_length_get(url, normalization_flags); char *buf; int index = 0; int offset = 0; /* stuff alloc'd here gets gc'd on HdrHeap::destroy() */ buf = heap->allocate_str(len + 1); - url_print(url, buf, len, &index, &offset, normalized); + url_print(url, buf, len, &index, &offset, normalization_flags); buf[len] = '\0'; if (length) { *length = len; } - url->m_clean = true; // reset since we have url_print()'ed again - url->m_len_printed_string = len; - url->m_ptr_printed_string = buf; + url->m_clean = true; // reset since we have url_print()'ed again + url->m_len_printed_string = len; + url->m_ptr_printed_string = buf; + url->m_normalization_flags = normalization_flags; return buf; } } char * -url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap, bool normalized) +url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap) { int len = url_length_get(url); char *buf; @@ -655,7 +656,7 @@ url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap, bool norm buf = arena ? arena->str_alloc(len) : static_cast(ats_malloc(len + 1)); - url_print(url, buf, len, &index, &offset, normalized); + url_print(url, buf, len, &index, &offset); buf[len] = '\0'; /* see string_get_ref() */ @@ -801,15 +802,19 @@ url_type_get(URLImpl *url) -------------------------------------------------------------------------*/ int -url_length_get(URLImpl *url) +url_length_get(URLImpl *url, unsigned normalization_flags) { int length = 0; if (url->m_ptr_scheme) { - if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE)) { - length += url->m_len_scheme + 1; // +1 for ":" - } else { - length += url->m_len_scheme + 3; // +3 for "://" + length += url->m_len_scheme + 3; // +3 for "://" + + } else if (normalization_flags & URLNormalize::IMPLIED_SCHEME) { + if (URL_TYPE_HTTP == url->m_url_type) { + length += URL_LEN_HTTP + 3; + + } else if (URL_TYPE_HTTPS == url->m_url_type) { + length += URL_LEN_HTTPS + 3; } } @@ -1582,15 +1587,30 @@ url_parse_http_regex(HdrHeap *heap, URLImpl *url, const char **start, const char ***********************************************************************/ int -url_print(URLImpl *url, char *buf_start, int buf_length, int *buf_index_inout, int *buf_chars_to_skip_inout, bool normalize) +url_print(URLImpl *url, char *buf_start, int buf_length, int *buf_index_inout, int *buf_chars_to_skip_inout, + unsigned normalization_flags) { #define TRY(x) \ if (!x) \ return 0 + bool scheme_added = false; if (url->m_ptr_scheme) { - TRY((normalize ? mime_mem_print_lc : mime_mem_print)(url->m_ptr_scheme, url->m_len_scheme, buf_start, buf_length, - buf_index_inout, buf_chars_to_skip_inout)); + TRY(((normalization_flags & URLNormalize::LC_SCHEME_HOST) ? mime_mem_print_lc : mime_mem_print)( + url->m_ptr_scheme, url->m_len_scheme, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); + scheme_added = true; + + } else if (normalization_flags & URLNormalize::IMPLIED_SCHEME) { + if (URL_TYPE_HTTP == url->m_url_type) { + TRY(mime_mem_print(URL_SCHEME_HTTP, URL_LEN_HTTP, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); + scheme_added = true; + + } else if (URL_TYPE_HTTPS == url->m_url_type) { + TRY(mime_mem_print(URL_SCHEME_HTTPS, URL_LEN_HTTPS, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); + scheme_added = true; + } + } + if (scheme_added) { TRY(mime_mem_print("://", 3, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); } @@ -1612,8 +1632,8 @@ url_print(URLImpl *url, char *buf_start, int buf_length, int *buf_index_inout, i if (bracket_p) { TRY(mime_mem_print("[", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); } - TRY((normalize ? mime_mem_print_lc : mime_mem_print)(url->m_ptr_host, url->m_len_host, buf_start, buf_length, buf_index_inout, - buf_chars_to_skip_inout)); + TRY(((normalization_flags & URLNormalize::LC_SCHEME_HOST) ? mime_mem_print_lc : mime_mem_print)( + url->m_ptr_host, url->m_len_host, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); if (bracket_p) { TRY(mime_mem_print("]", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); } diff --git a/proxy/hdrs/URL.h b/proxy/hdrs/URL.h index ab8b36caf4a..d31107ee45a 100644 --- a/proxy/hdrs/URL.h +++ b/proxy/hdrs/URL.h @@ -76,7 +76,8 @@ struct URLImpl : public HdrHeapObjImpl { uint32_t m_clean : 1; /// Whether the URI had an absolutely empty path, not even an initial '/'. uint32_t m_path_is_empty : 1; - // 8 bytes + 2 bits, will result in padding + uint32_t m_normalization_flags : 2; // Only valid if both m_clean and m_ptr_printed_sting are non-zero. + // 8 bytes + 4 bits, will result in padding // Marshaling Functions int marshal(MarshalXlate *str_xlate, int num_xlate); @@ -166,13 +167,22 @@ URLImpl *url_copy(URLImpl *s_url, HdrHeap *s_heap, HdrHeap *d_heap, bool inherit void url_copy_onto(URLImpl *s_url, HdrHeap *s_heap, URLImpl *d_url, HdrHeap *d_heap, bool inherit_strs = true); void url_copy_onto_as_server_url(URLImpl *s_url, HdrHeap *s_heap, URLImpl *d_url, HdrHeap *d_heap, bool inherit_strs = true); -int url_print(URLImpl *u, char *buf, int bufsize, int *bufindex, int *dumpoffset, bool normalized = false); +// Normalization flag masks. +namespace URLNormalize +{ +unsigned const NONE = 0; +unsigned const IMPLIED_SCHEME = 1; // If scheme missing, add scheme implied by URL type. +unsigned const LC_SCHEME_HOST = 2; // Force scheme and host to lower case if necessary. +}; // namespace URLNormalize + +int url_print(URLImpl *u, char *buf, int bufsize, int *bufindex, int *dumpoffset, + unsigned normalization_flags = URLNormalize::NONE); void url_describe(HdrHeapObjImpl *raw, bool recurse); -int url_length_get(URLImpl *url); -char *url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap, bool normalized = false); +int url_length_get(URLImpl *url, unsigned normalization_flags = URLNormalize::NONE); +char *url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap); void url_clear_string_ref(URLImpl *url); -char *url_string_get_ref(HdrHeap *heap, URLImpl *url, int *length, bool normalized = false); +char *url_string_get_ref(HdrHeap *heap, URLImpl *url, int *length, unsigned normalization_flags = URLNormalize::NONE); void url_called_set(URLImpl *url); char *url_string_get_buf(URLImpl *url, char *dstbuf, int dstbuf_size, int *length); @@ -245,14 +255,14 @@ class URL : public HdrHeapSDKHandle // Note that URL::destroy() is inherited from HdrHeapSDKHandle. void nuke_proxy_stuff(); - int print(char *buf, int bufsize, int *bufindex, int *dumpoffset, bool normalized = false) const; + int print(char *buf, int bufsize, int *bufindex, int *dumpoffset, unsigned normalization_flags = URLNormalize::NONE) const; - int length_get() const; + int length_get(unsigned normalization_flags = URLNormalize::NONE) const; void clear_string_ref(); - char *string_get(Arena *arena, int *length = nullptr, bool normalized = false) const; - char *string_get_ref(int *length = nullptr, bool normalized = false) const; + char *string_get(Arena *arena, int *length = nullptr) const; + char *string_get_ref(int *length = nullptr, unsigned normalization_flags = URLNormalize::NONE) const; char *string_get_buf(char *dstbuf, int dsbuf_size, int *length = nullptr) const; void hash_get(CryptoHash *hash, cache_generation_t generation = -1) const; void host_hash_get(CryptoHash *hash) const; @@ -415,37 +425,37 @@ URL::nuke_proxy_stuff() -------------------------------------------------------------------------*/ inline int -URL::print(char *buf, int bufsize, int *bufindex, int *dumpoffset, bool normalized) const +URL::print(char *buf, int bufsize, int *bufindex, int *dumpoffset, unsigned normalization_flags) const { ink_assert(valid()); - return url_print(m_url_impl, buf, bufsize, bufindex, dumpoffset, normalized); + return url_print(m_url_impl, buf, bufsize, bufindex, dumpoffset, normalization_flags); } /*------------------------------------------------------------------------- -------------------------------------------------------------------------*/ inline int -URL::length_get() const +URL::length_get(unsigned normalization_flags) const { ink_assert(valid()); - return url_length_get(m_url_impl); + return url_length_get(m_url_impl, normalization_flags); } /*------------------------------------------------------------------------- -------------------------------------------------------------------------*/ inline char * -URL::string_get(Arena *arena_or_null_for_malloc, int *length, bool normalized) const +URL::string_get(Arena *arena_or_null_for_malloc, int *length) const { ink_assert(valid()); - return url_string_get(m_url_impl, arena_or_null_for_malloc, length, m_heap, normalized); + return url_string_get(m_url_impl, arena_or_null_for_malloc, length, m_heap); } inline char * -URL::string_get_ref(int *length, bool normalized) const +URL::string_get_ref(int *length, unsigned normalization_flags) const { ink_assert(valid()); - return url_string_get_ref(m_heap, m_url_impl, length, normalized); + return url_string_get_ref(m_heap, m_url_impl, length, normalization_flags); } inline void diff --git a/proxy/http/remap/NextHopConsistentHash.cc b/proxy/http/remap/NextHopConsistentHash.cc index c14c971bf9a..d1d2b0ff740 100644 --- a/proxy/http/remap/NextHopConsistentHash.cc +++ b/proxy/http/remap/NextHopConsistentHash.cc @@ -134,7 +134,7 @@ NextHopConsistentHash::getHashKey(uint64_t sm_id, HttpRequestData *hrdata, ATSHa // calculate a hash using the selected config. switch (hash_key) { case NH_URL_HASH_KEY: - url_string_ref = url->string_get_ref(&len, true); + url_string_ref = url->string_get_ref(&len, URLNormalize::LC_SCHEME_HOST); if (url_string_ref && len > 0) { h->update(url_string_ref, len); NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] url hash string: %s", sm_id, url_string_ref); diff --git a/src/traffic_server/InkAPI.cc b/src/traffic_server/InkAPI.cc index e4a4f230c55..a02373dc1d1 100644 --- a/src/traffic_server/InkAPI.cc +++ b/src/traffic_server/InkAPI.cc @@ -5157,7 +5157,7 @@ TSHttpHdrEffectiveUrlBufGet(TSMBuffer hdr_buf, TSMLoc hdr_loc, char *buf, int64_ return TS_ERROR; } - int url_length = buf_handle->url_printed_length(); + int url_length = buf_handle->url_printed_length(URLNormalize::LC_SCHEME_HOST | URLNormalize::IMPLIED_SCHEME); sdk_assert(url_length >= 0); @@ -5170,7 +5170,7 @@ TSHttpHdrEffectiveUrlBufGet(TSMBuffer hdr_buf, TSMLoc hdr_loc, char *buf, int64_ int index = 0; int offset = 0; - buf_handle->url_print(buf, size, &index, &offset, true); + buf_handle->url_print(buf, size, &index, &offset, URLNormalize::LC_SCHEME_HOST | URLNormalize::IMPLIED_SCHEME); } return TS_SUCCESS; diff --git a/tests/gold_tests/pluginTest/tsapi/log.gold b/tests/gold_tests/pluginTest/tsapi/log.gold index b12f447867f..c157e8168bc 100644 --- a/tests/gold_tests/pluginTest/tsapi/log.gold +++ b/tests/gold_tests/pluginTest/tsapi/log.gold @@ -40,14 +40,14 @@ TSUrlPortGet(): SERVER_PORT Global: event=TS_EVENT_HTTP_SEND_REQUEST_HDR TSHttpTxnEffectiveUrlStringGet(): http://127.0.0.1:SERVER_PORT/ Request To Server: -TSHttpHdrEffectiveUrlBufGet(): 127.0.0.1:SERVER_PORT/ +TSHttpHdrEffectiveUrlBufGet(): http://127.0.0.1:SERVER_PORT/ TSUrlSchemeGet(): http TSUrlRawSchemeGet(): failed to get raw URL scheme TSUrlPortGet(): 80 Transaction: event=TS_EVENT_HTTP_SEND_REQUEST_HDR TSHttpTxnEffectiveUrlStringGet(): http://127.0.0.1:SERVER_PORT/ Request To Server: -TSHttpHdrEffectiveUrlBufGet(): 127.0.0.1:SERVER_PORT/ +TSHttpHdrEffectiveUrlBufGet(): http://127.0.0.1:SERVER_PORT/ TSUrlSchemeGet(): http TSUrlRawSchemeGet(): failed to get raw URL scheme TSUrlPortGet(): 80 @@ -81,14 +81,14 @@ TSUrlPortGet(): SERVER_PORT Global: event=TS_EVENT_HTTP_SEND_REQUEST_HDR TSHttpTxnEffectiveUrlStringGet(): http://127.0.0.1:SERVER_PORT/xYz Request To Server: -TSHttpHdrEffectiveUrlBufGet(): 127.0.0.1:SERVER_PORT/xYz +TSHttpHdrEffectiveUrlBufGet(): http://127.0.0.1:SERVER_PORT/xYz TSUrlSchemeGet(): http TSUrlRawSchemeGet(): failed to get raw URL scheme TSUrlPortGet(): 80 Transaction: event=TS_EVENT_HTTP_SEND_REQUEST_HDR TSHttpTxnEffectiveUrlStringGet(): http://127.0.0.1:SERVER_PORT/xYz Request To Server: -TSHttpHdrEffectiveUrlBufGet(): 127.0.0.1:SERVER_PORT/xYz +TSHttpHdrEffectiveUrlBufGet(): http://127.0.0.1:SERVER_PORT/xYz TSUrlSchemeGet(): http TSUrlRawSchemeGet(): failed to get raw URL scheme TSUrlPortGet(): 80 @@ -122,14 +122,14 @@ TSUrlPortGet(): SERVER_PORT Global: event=TS_EVENT_HTTP_SEND_REQUEST_HDR TSHttpTxnEffectiveUrlStringGet(): http://127.0.0.1:SERVER_PORT/ Request To Server: -TSHttpHdrEffectiveUrlBufGet(): 127.0.0.1:SERVER_PORT/ +TSHttpHdrEffectiveUrlBufGet(): http://127.0.0.1:SERVER_PORT/ TSUrlSchemeGet(): http TSUrlRawSchemeGet(): failed to get raw URL scheme TSUrlPortGet(): 80 Transaction: event=TS_EVENT_HTTP_SEND_REQUEST_HDR TSHttpTxnEffectiveUrlStringGet(): http://127.0.0.1:SERVER_PORT/ Request To Server: -TSHttpHdrEffectiveUrlBufGet(): 127.0.0.1:SERVER_PORT/ +TSHttpHdrEffectiveUrlBufGet(): http://127.0.0.1:SERVER_PORT/ TSUrlSchemeGet(): http TSUrlRawSchemeGet(): failed to get raw URL scheme TSUrlPortGet(): 80