From f455113f0de89d93d2d64c6cb6a85cc321c05778 Mon Sep 17 00:00:00 2001 From: Jeff Elsloo Date: Tue, 5 Apr 2022 18:18:08 -0600 Subject: [PATCH 1/3] Adds a simple tool to generate new escape tables --- tools/Makefile.am | 8 +++ tools/escape_mapper/README | 14 +++++ tools/escape_mapper/escape_mapper.c | 80 +++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 tools/escape_mapper/README create mode 100644 tools/escape_mapper/escape_mapper.c diff --git a/tools/Makefile.am b/tools/Makefile.am index c806d5ba793..44a4191c275 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -54,6 +54,14 @@ http_load_http_load_SOURCES = \ endif +if BUILD_TEST_TOOLS +bin_PROGRAMS += escape_mapper/escape_mapper +else +noinst_PROGRAMS += escape_mapper/escape_mapper +endif + +escape_mapper_escape_mapper_SOURCES = escape_mapper/escape_mapper.c + all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(DATA) @sed "s/ -fPIE//" tsxs > tsxs.new @mv -f tsxs.new tsxs diff --git a/tools/escape_mapper/README b/tools/escape_mapper/README new file mode 100644 index 00000000000..f86371a36be --- /dev/null +++ b/tools/escape_mapper/README @@ -0,0 +1,14 @@ +The `escape_mapper` tool is a simple utility that allows one to view +the existing table used for URL escaping, found in LogUtils, specifically +the escapify_url_common function. This function allows one to specify +an alternate table that contains more characters to escape than the +RFC-compliant default. + +WARNING: the default encoded characters list is currently hardcoded. + +This tool operates in two basic modes: + 1) print out the default mappings + 2) print out the new mapping based on the provided argument + +Only one argument is supported to generate a new mapping table, for example: + ./escape_mapper '&,+/=' diff --git a/tools/escape_mapper/escape_mapper.c b/tools/escape_mapper/escape_mapper.c new file mode 100644 index 00000000000..e43d4573bf6 --- /dev/null +++ b/tools/escape_mapper/escape_mapper.c @@ -0,0 +1,80 @@ +/** @file + + A brief file description + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include +#include + +void +add_mapping(unsigned char *table, char c) +{ + int i = c / 8; + int x = 7 - c % 8; + int r = 1 << x; + + table[i] = table[i] | r; +} + +int +main(int argc, char *argv[]) +{ + // only support a single arg that contains all the chars we wish to escapify + if (argc > 1 && argc != 2) { + printf("Provide a single argument with a list of characters to add to the default encoding table\n"); + return (1); + } + + // default characters supported by the codes_to_escape table found in LogUtils.cc + unsigned char to_escape[16] = { + ' ', '"', '#', '%', '<', '>', '[', ']', '\\', '^', '`', '{', '|', '}', '~', 0x7F, + }; + + unsigned char escape_codes[32]; + memset(&escape_codes[0], 0, sizeof(escape_codes)); + + // indexes 0-3 are marked as "control" + for (int i = 0; i < 4; i++) { + escape_codes[i] = 0xFF; + } + + for (unsigned long i = 0; i < sizeof(to_escape) / sizeof(to_escape[0]); i++) { + add_mapping(&escape_codes[0], to_escape[i]); + } + + // add the chars specified in argv + if (argc > 1) { + for (unsigned long i = 0; i < strlen(argv[1]); i++) { + printf("Adding %c to escape mapping table\n", argv[1][i]); + add_mapping(&escape_codes[0], argv[1][i]); + } + + printf("\n"); + } + + printf("%s Escape Mapping Table:\n", (argc > 1) ? "New" : "Default"); + + for (unsigned long i = 0; i < sizeof(escape_codes) / sizeof(escape_codes[0]); i++) { + printf(" %2lu: %#04x\n", i, escape_codes[i]); + } + + return (0); +} From dd6b6ac6ba8b1f94c693dd0b0d4980bd98f858d2 Mon Sep 17 00:00:00 2001 From: Jeff Elsloo Date: Tue, 5 Apr 2022 18:45:30 -0600 Subject: [PATCH 2/3] Removed trailing whitespace from the README. --- tools/escape_mapper/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/escape_mapper/README b/tools/escape_mapper/README index f86371a36be..860c9a823ae 100644 --- a/tools/escape_mapper/README +++ b/tools/escape_mapper/README @@ -1,7 +1,7 @@ The `escape_mapper` tool is a simple utility that allows one to view the existing table used for URL escaping, found in LogUtils, specifically the escapify_url_common function. This function allows one to specify -an alternate table that contains more characters to escape than the +an alternate table that contains more characters to escape than the RFC-compliant default. WARNING: the default encoded characters list is currently hardcoded. From 8e0e8939bad10c41beaff46443e2267e68743f9e Mon Sep 17 00:00:00 2001 From: Brian Neradt Date: Tue, 12 Apr 2022 16:06:18 +0000 Subject: [PATCH 3/3] Some review suggestions: 1. Add the binary to gitignore 2. Add a NOTE requesting changies to LogUtils.cc codes_to_escape to update the tool. 3. Convert escape_mapper to C++. --- .gitignore | 1 + proxy/logging/LogUtils.cc | 2 ++ tools/Makefile.am | 2 +- .../{escape_mapper.c => escape_mapper.cc} | 35 +++++++++++-------- 4 files changed, 24 insertions(+), 16 deletions(-) rename tools/escape_mapper/{escape_mapper.c => escape_mapper.cc} (59%) diff --git a/.gitignore b/.gitignore index 3f9ff12506f..cee67733ed4 100644 --- a/.gitignore +++ b/.gitignore @@ -199,6 +199,7 @@ CTAGS tools/http_load/http_load tools/jtest/jtest tools/trafficserver.pc +tools/escape_mapper/escape_mapper BUILDS DEBUG diff --git a/proxy/logging/LogUtils.cc b/proxy/logging/LogUtils.cc index 25fe6728a7a..ee64f32e942 100644 --- a/proxy/logging/LogUtils.cc +++ b/proxy/logging/LogUtils.cc @@ -296,6 +296,8 @@ escapify_url_common(Arena *arena, char *url, size_t len_in, int *len_out, char * // historically this is what the traffic_server has done. // Note that we leave codes beyond 127 unmodified. // + // NOTE: any updates to this table should result in an update to: + // tools/escape_mapper/escape_mapper.cc. static const unsigned char codes_to_escape[32] = { 0xFF, 0xFF, 0xFF, 0xFF, // control diff --git a/tools/Makefile.am b/tools/Makefile.am index 44a4191c275..135bed0a213 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -60,7 +60,7 @@ else noinst_PROGRAMS += escape_mapper/escape_mapper endif -escape_mapper_escape_mapper_SOURCES = escape_mapper/escape_mapper.c +escape_mapper_escape_mapper_SOURCES = escape_mapper/escape_mapper.cc all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(DATA) @sed "s/ -fPIE//" tsxs > tsxs.new diff --git a/tools/escape_mapper/escape_mapper.c b/tools/escape_mapper/escape_mapper.cc similarity index 59% rename from tools/escape_mapper/escape_mapper.c rename to tools/escape_mapper/escape_mapper.cc index e43d4573bf6..fc8ca2505c3 100644 --- a/tools/escape_mapper/escape_mapper.c +++ b/tools/escape_mapper/escape_mapper.cc @@ -21,8 +21,9 @@ limitations under the License. */ -#include -#include +#include +#include +#include void add_mapping(unsigned char *table, char c) @@ -39,7 +40,7 @@ main(int argc, char *argv[]) { // only support a single arg that contains all the chars we wish to escapify if (argc > 1 && argc != 2) { - printf("Provide a single argument with a list of characters to add to the default encoding table\n"); + std::cerr << "Provide a single argument with a list of characters to add to the default encoding table." << std::endl; return (1); } @@ -48,33 +49,37 @@ main(int argc, char *argv[]) ' ', '"', '#', '%', '<', '>', '[', ']', '\\', '^', '`', '{', '|', '}', '~', 0x7F, }; - unsigned char escape_codes[32]; - memset(&escape_codes[0], 0, sizeof(escape_codes)); + unsigned char escape_codes[32] = {0}; // indexes 0-3 are marked as "control" for (int i = 0; i < 4; i++) { escape_codes[i] = 0xFF; } - for (unsigned long i = 0; i < sizeof(to_escape) / sizeof(to_escape[0]); i++) { - add_mapping(&escape_codes[0], to_escape[i]); + // add_mapping performs a logical or on the entries, so the above 0xFF values + // will persist. + for (auto char_to_escape : to_escape) { + add_mapping(&escape_codes[0], char_to_escape); } // add the chars specified in argv if (argc > 1) { - for (unsigned long i = 0; i < strlen(argv[1]); i++) { - printf("Adding %c to escape mapping table\n", argv[1][i]); - add_mapping(&escape_codes[0], argv[1][i]); + std::string_view escape_characters{argv[1]}; + for (auto const char_to_escape : escape_characters) { + std::cout << "Adding '" << char_to_escape << "' to escape mapping table." << std::endl; + add_mapping(&escape_codes[0], char_to_escape); } - - printf("\n"); + std::cout << std::endl; } - printf("%s Escape Mapping Table:\n", (argc > 1) ? "New" : "Default"); + std::string_view qualification{((argc > 1) ? "New" : "Default")}; + std::cout << qualification << " Escape Mapping Table:" << std::endl; for (unsigned long i = 0; i < sizeof(escape_codes) / sizeof(escape_codes[0]); i++) { - printf(" %2lu: %#04x\n", i, escape_codes[i]); + std::cout << std::dec << std::setfill(' ') << std::setw(4) << i << ": 0x"; + auto const escape_code = static_cast(escape_codes[i]); + std::cout << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << escape_code << std::endl; } - return (0); + return 0; }