From 83a0581dee66f5b432a8617fa45a7b0e456d56e4 Mon Sep 17 00:00:00 2001 From: Masakazu Kitajo Date: Fri, 5 Jul 2024 18:07:09 -0600 Subject: [PATCH 1/4] Make TSUrlHttpParamsGet include params segment --- src/proxy/hdrs/URL.cc | 73 ++++++++--------------------- src/proxy/http/HttpSM.cc | 3 +- src/traffic_cache_tool/CacheScan.cc | 8 ++-- 3 files changed, 24 insertions(+), 60 deletions(-) diff --git a/src/proxy/hdrs/URL.cc b/src/proxy/hdrs/URL.cc index 191e644876f..79de7790ab1 100644 --- a/src/proxy/hdrs/URL.cc +++ b/src/proxy/hdrs/URL.cc @@ -551,8 +551,19 @@ URLImpl::set_path(HdrHeap *heap, const char *value, int length, bool copy_string void URLImpl::set_params(HdrHeap *heap, const char *value, int length, bool copy_string) { - url_called_set(this); - mime_str_u16_set(heap, value, length, &(this->m_ptr_params), &(this->m_len_params), copy_string); + int total_length = this->m_len_path + 1 + length; + char *path_and_params = static_cast(alloca(total_length)); + int path_len = this->m_len_path; + + if (auto p = strchr(this->m_ptr_path, ';'); p != nullptr) { + auto params_len = path_len - (p - this->m_ptr_path); + path_len -= params_len; + } + memcpy(path_and_params, this->m_ptr_path, path_len); + memcpy(path_and_params + this->m_len_path, ";", 1); + memcpy(path_and_params + this->m_len_path + 1, value, length); + + this->set_path(heap, path_and_params, total_length, true); } /*------------------------------------------------------------------------- @@ -887,10 +898,6 @@ url_length_get(URLImpl *url, unsigned normalization_flags) length += 1; // +1 for "/" } - if (url->m_ptr_params && url->m_len_params > 0) { - length += url->m_len_params + 1; // +1 for ";" - } - if (url->m_ptr_query && url->m_len_query > 0) { length += url->m_len_query + 1; // +1 for "?" } @@ -962,12 +969,6 @@ url_to_string(URLImpl *url, Arena *arena, int *length) memcpy(&str[idx], url->m_ptr_path, url->m_len_path); idx += url->m_len_path; - if (url->m_ptr_params && url->m_len_params > 0) { - str[idx++] = ';'; - memcpy(&str[idx], url->m_ptr_params, url->m_len_params); - idx += url->m_len_params; - } - if (url->m_ptr_query && url->m_len_query > 0) { str[idx++] = '?'; memcpy(&str[idx], url->m_ptr_query, url->m_len_query); @@ -1433,8 +1434,6 @@ url_parse_http(HdrHeap *heap, URLImpl *url, const char **start, const char *end, const char *cur; const char *path_start = nullptr; const char *path_end = nullptr; - const char *params_start = nullptr; - const char *params_end = nullptr; const char *query_start = nullptr; const char *query_end = nullptr; const char *fragment_start = nullptr; @@ -1456,13 +1455,9 @@ url_parse_http(HdrHeap *heap, URLImpl *url, const char **start, const char *end, if (*cur == '/') { path_start = cur; } - mask = ';' & '?' & '#'; + mask = '?' & '#'; parse_path2: if ((*cur & mask) == mask) { - if (*cur == ';') { - path_end = cur; - goto parse_params1; - } if (*cur == '?') { path_end = cur; goto parse_query1; @@ -1472,26 +1467,11 @@ url_parse_http(HdrHeap *heap, URLImpl *url, const char **start, const char *end, goto parse_fragment1; } } else { - ink_assert((*cur != ';') && (*cur != '?') && (*cur != '#')); + ink_assert((*cur != '?') && (*cur != '#')); } GETNEXT(done); goto parse_path2; -parse_params1: - params_start = cur + 1; - GETNEXT(done); -parse_params2: - if (*cur == '?') { - params_end = cur; - goto parse_query1; - } - if (*cur == '#') { - params_end = cur; - goto parse_fragment1; - } - GETNEXT(done); - goto parse_params2; - parse_query1: query_start = cur + 1; GETNEXT(done); @@ -1552,12 +1532,6 @@ url_parse_http(HdrHeap *heap, URLImpl *url, const char **start, const char *end, // nothing_after_host check. url->m_path_is_empty = true; } - if (params_start) { - if (!params_end) { - params_end = cur; - } - url->set_params(heap, params_start, params_end - params_start, copy_strings); - } if (query_start) { // There was a query string marked by '?'. if (!query_end) { @@ -1715,11 +1689,6 @@ url_print(URLImpl *url, char *buf_start, int buf_length, int *buf_index_inout, i TRY(mime_mem_print(url->m_ptr_path, url->m_len_path, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); } - if (url->m_ptr_params && url->m_len_params > 0) { - TRY(mime_mem_print(";", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); - TRY(mime_mem_print(url->m_ptr_params, url->m_len_params, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); - } - if (url->m_ptr_query && url->m_len_query > 0) { TRY(mime_mem_print("?", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); TRY(mime_mem_print(url->m_ptr_query, url->m_len_query, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); @@ -1753,8 +1722,6 @@ url_describe(HdrHeapObjImpl *raw, bool /* recurse ATS_UNUSED */) obj->m_len_port, obj->m_port); Dbg(dbg_ctl_http, "\tPATH: \"%.*s\", PATH_LEN: %d,", obj->m_len_path, (obj->m_ptr_path ? obj->m_ptr_path : "NULL"), obj->m_len_path); - Dbg(dbg_ctl_http, "\tPARAMS: \"%.*s\", PARAMS_LEN: %d,", obj->m_len_params, (obj->m_ptr_params ? obj->m_ptr_params : "NULL"), - obj->m_len_params); Dbg(dbg_ctl_http, "\tQUERY: \"%.*s\", QUERY_LEN: %d,", obj->m_len_query, (obj->m_ptr_query ? obj->m_ptr_query : "NULL"), obj->m_len_query); Dbg(dbg_ctl_http, "\tFRAGMENT: \"%.*s\", FRAGMENT_LEN: %d]", obj->m_len_fragment, @@ -1855,8 +1822,8 @@ url_CryptoHash_get_general(const URLImpl *url, CryptoContext &ctx, CryptoHash &h ends[7] = strs[7] + 1; ends[8] = strs[8] + url->m_len_path; - strs[9] = ";"; - strs[10] = url->m_ptr_params; + strs[9] = ""; + strs[10] = nullptr; strs[11] = "?"; // Special case for the query paramters, allowing us to ignore them if requested @@ -1868,8 +1835,8 @@ url_CryptoHash_get_general(const URLImpl *url, CryptoContext &ctx, CryptoHash &h ends[12] = nullptr; } - ends[9] = strs[9] + 1; - ends[10] = strs[10] + url->m_len_params; + ends[9] = strs[9] + 0; + ends[10] = strs[10] + 0; ends[11] = strs[11] + 1; p = buffer; @@ -1927,7 +1894,7 @@ url_CryptoHash_get(const URLImpl *url, CryptoHash *hash, bool ignore_query, cach { URLHashContext ctx; if ((url_hash_method != 0) && (url->m_url_type == URL_TYPE_HTTP) && - ((url->m_len_user + url->m_len_password + url->m_len_params + (ignore_query ? 0 : url->m_len_query)) == 0) && + ((url->m_len_user + url->m_len_password + (ignore_query ? 0 : url->m_len_query)) == 0) && (3 + 1 + 1 + 1 + 1 + 1 + 2 + url->m_len_scheme + url->m_len_host + url->m_len_path < BUFSIZE) && (memchr(url->m_ptr_host, '%', url->m_len_host) == nullptr) && (memchr(url->m_ptr_path, '%', url->m_len_path) == nullptr)) { url_CryptoHash_get_fast(url, ctx, hash, generation); diff --git a/src/proxy/http/HttpSM.cc b/src/proxy/http/HttpSM.cc index 1c2f606a880..97affff9e75 100644 --- a/src/proxy/http/HttpSM.cc +++ b/src/proxy/http/HttpSM.cc @@ -709,8 +709,7 @@ HttpSM::state_read_client_request_header(int event, void *data) call_transact_and_set_next_state(HttpTransact::TooEarly); return 0; } else if (!SSLConfigParams::server_allow_early_data_params && - (t_state.hdr_info.client_request.m_http->u.req.m_url_impl->m_len_params > 0 || - t_state.hdr_info.client_request.m_http->u.req.m_url_impl->m_len_query > 0)) { + t_state.hdr_info.client_request.m_http->u.req.m_url_impl->m_len_query > 0) { SMDbg(dbg_ctl_http, "client request was from early data but HAS parameters"); call_transact_and_set_next_state(HttpTransact::TooEarly); return 0; diff --git a/src/traffic_cache_tool/CacheScan.cc b/src/traffic_cache_tool/CacheScan.cc index bac9ea3f758..26e92894eff 100644 --- a/src/traffic_cache_tool/CacheScan.cc +++ b/src/traffic_cache_tool/CacheScan.cc @@ -388,19 +388,17 @@ CacheScan::get_alternates(const char *buf, int length, bool search) std::string str; if (search) { - swoc::bwprint(str, "{}://{}:{}/{};{}?{}", std::string_view(url->m_ptr_scheme, url->m_len_scheme), + swoc::bwprint(str, "{}://{}:{}/{}?{}", std::string_view(url->m_ptr_scheme, url->m_len_scheme), std::string_view(url->m_ptr_host, url->m_len_host), std::string_view(url->m_ptr_port, url->m_len_port), - std::string_view(url->m_ptr_path, url->m_len_path), std::string_view(url->m_ptr_params, url->m_len_params), - std::string_view(url->m_ptr_query, url->m_len_query)); + std::string_view(url->m_ptr_path, url->m_len_path), std::string_view(url->m_ptr_query, url->m_len_query)); if (u_matcher->match(str.data())) { str = this->stripe->hashText + " " + str; std::cout << "match found " << str << std::endl; } } else { - swoc::bwprint(str, "stripe: {} : {}://{}:{}/{};{}?{}", std::string_view(this->stripe->hashText), + swoc::bwprint(str, "stripe: {} : {}://{}:{}/{}?{}", std::string_view(this->stripe->hashText), std::string_view(url->m_ptr_scheme, url->m_len_scheme), std::string_view(url->m_ptr_host, url->m_len_host), std::string_view(url->m_ptr_port, url->m_len_port), std::string_view(url->m_ptr_path, url->m_len_path), - std::string_view(url->m_ptr_params, url->m_len_params), std::string_view(url->m_ptr_query, url->m_len_query)); std::cout << str << std::endl; } From fa80627722dffe1301d737dd6c16111c8d7d98eb Mon Sep 17 00:00:00 2001 From: Masakazu Kitajo Date: Mon, 8 Jul 2024 14:22:16 -0600 Subject: [PATCH 2/4] Add some unit tests --- src/proxy/hdrs/unit_tests/test_URL.cc | 82 +++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/src/proxy/hdrs/unit_tests/test_URL.cc b/src/proxy/hdrs/unit_tests/test_URL.cc index 5c7cdf4f408..596be86a2b3 100644 --- a/src/proxy/hdrs/unit_tests/test_URL.cc +++ b/src/proxy/hdrs/unit_tests/test_URL.cc @@ -697,3 +697,85 @@ TEST_CASE("UrlHashGet", "[url][hash_get]") } } } + +struct get_path_test_case { + const std::string description; + const std::string uri; + const std::string path; +}; + +// clang-format off +std::vector get_path_test_cases = { + { + "Semicolon in paths 1", + "http://foo.test/abc/xyz;p1=1,p2=2", + "abc/xyz;p1=1,p2=2", + }, + { + "Semicolon in paths 2", + "http://foo.test/abc;p1=1,p2=2/xyz", + "abc;p1=1,p2=2/xyz", + }, + { + "Semicolon in paths 3", + "http://foo.test/abc/xyz;p1=1,p2=2?q1=1", + "abc/xyz;p1=1,p2=2", + }, + { + "Semicolon in paths 4", + "http://foo.test/abc;p1=1,p2=2/xyz?q1=1", + "abc;p1=1,p2=2/xyz", + }, +}; + +/** Return the hash related to a URI. + * + * @param[in] uri The URI to hash. + * @return The hash of the URI. + */ +TEST_CASE("UrlPathGet", "[url][path_get]") +{ + for (auto const &test_case : get_path_test_cases) { + std::string description = test_case.description + ": " + test_case.uri + " -> " + test_case.path; + SECTION(description) { + URL url; + HdrHeap *heap = new_HdrHeap(); + url.create(heap); + url.parse(test_case.uri); + const char *path; + int path_len; + path = url.path_get(&path_len); + CHECK(std::string_view(path, path_len) == test_case.path); + heap->destroy(); + } + } +} + + +/** + * Tests for deprecated params_get/set + */ +TEST_CASE("UrlParamsGet", "[url][params_get]") +{ + // Expected behavior + // - ParamsGet always return empty string + // - ParamsSet appends the value to path + // - PathGet returns params appended by ParamsSet + + const char *value; + int value_len; + + URL url; + HdrHeap *heap = new_HdrHeap(); + url.create(heap); + url.parse("https://foo.test/path"); + value = url.path_get(&value_len); + CHECK(std::string_view(value, value_len) == "path"); + url.params_set("param=1", 7); + value = url.params_get(&value_len); + CHECK(value == nullptr); + CHECK(value_len == 0); + value = url.path_get(&value_len); + CHECK(std::string_view(value, value_len) == "path;param=1"); + heap->destroy(); +} From e0129cdc6058f64ce43bc5134c01d6be6f0b9ce7 Mon Sep 17 00:00:00 2001 From: Masakazu Kitajo Date: Mon, 8 Jul 2024 15:05:19 -0600 Subject: [PATCH 3/4] Update docs --- include/ts/ts.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/ts/ts.h b/include/ts/ts.h index 99e8c41ce71..bc829f10238 100644 --- a/include/ts/ts.h +++ b/include/ts/ts.h @@ -667,6 +667,9 @@ TSReturnCode TSUrlFtpTypeSet(TSMBuffer bufp, TSMLoc offset, int type); argument. Note: the returned string is not guaranteed to be null-terminated. + This function is deprecated and returns empty string. + Plugins that need "params" can call TSUrlPathGet to get a whole path string to parse it. + @param bufp marshal buffer containing the URL. @param offset location of the URL. @param length of the returned string. @@ -683,6 +686,9 @@ const char *TSUrlHttpParamsGet(TSMBuffer bufp, TSMLoc offset, int *length); copies the string to within bufp, so you can modify or delete value after calling TSUrlHttpParamsSet(). + This function is deprecated. The value passed will be internally appended to the path portion. + Thus, TSUrlHttpParamsGet will not return the value set by this function. + @param bufp marshal buffer containing the URL. @param offset location of the URL. @param value HTTP params string to set in the URL. From 1dcf4d1f8818448b5f9764337eb8d87b0399cf76 Mon Sep 17 00:00:00 2001 From: Masakazu Kitajo Date: Mon, 8 Jul 2024 18:07:53 -0600 Subject: [PATCH 4/4] Fix --- src/proxy/hdrs/URL.cc | 13 ++++++++----- src/proxy/hdrs/unit_tests/test_URL.cc | 4 ++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/proxy/hdrs/URL.cc b/src/proxy/hdrs/URL.cc index 79de7790ab1..e56ab796c2f 100644 --- a/src/proxy/hdrs/URL.cc +++ b/src/proxy/hdrs/URL.cc @@ -551,17 +551,20 @@ URLImpl::set_path(HdrHeap *heap, const char *value, int length, bool copy_string void URLImpl::set_params(HdrHeap *heap, const char *value, int length, bool copy_string) { - int total_length = this->m_len_path + 1 + length; - char *path_and_params = static_cast(alloca(total_length)); - int path_len = this->m_len_path; + int path_len = this->m_len_path; + // Truncate the current param segment if it exists if (auto p = strchr(this->m_ptr_path, ';'); p != nullptr) { auto params_len = path_len - (p - this->m_ptr_path); path_len -= params_len; } + + int total_length = path_len + 1 + length; + char *path_and_params = static_cast(alloca(total_length)); + memcpy(path_and_params, this->m_ptr_path, path_len); - memcpy(path_and_params + this->m_len_path, ";", 1); - memcpy(path_and_params + this->m_len_path + 1, value, length); + memcpy(path_and_params + path_len, ";", 1); + memcpy(path_and_params + path_len + 1, value, length); this->set_path(heap, path_and_params, total_length, true); } diff --git a/src/proxy/hdrs/unit_tests/test_URL.cc b/src/proxy/hdrs/unit_tests/test_URL.cc index 596be86a2b3..9c5b3c64131 100644 --- a/src/proxy/hdrs/unit_tests/test_URL.cc +++ b/src/proxy/hdrs/unit_tests/test_URL.cc @@ -768,9 +768,9 @@ TEST_CASE("UrlParamsGet", "[url][params_get]") URL url; HdrHeap *heap = new_HdrHeap(); url.create(heap); - url.parse("https://foo.test/path"); + url.parse("https://foo.test/path;p=1"); value = url.path_get(&value_len); - CHECK(std::string_view(value, value_len) == "path"); + CHECK(std::string_view(value, value_len) == "path;p=1"); url.params_set("param=1", 7); value = url.params_get(&value_len); CHECK(value == nullptr);