diff --git a/.gitignore b/.gitignore index 3f9ff12506f..cee67733ed4 100644 --- a/.gitignore +++ b/.gitignore @@ -199,6 +199,7 @@ CTAGS tools/http_load/http_load tools/jtest/jtest tools/trafficserver.pc +tools/escape_mapper/escape_mapper BUILDS DEBUG diff --git a/proxy/logging/LogUtils.cc b/proxy/logging/LogUtils.cc index 25fe6728a7a..ee64f32e942 100644 --- a/proxy/logging/LogUtils.cc +++ b/proxy/logging/LogUtils.cc @@ -296,6 +296,8 @@ escapify_url_common(Arena *arena, char *url, size_t len_in, int *len_out, char * // historically this is what the traffic_server has done. // Note that we leave codes beyond 127 unmodified. // + // NOTE: any updates to this table should result in an update to: + // tools/escape_mapper/escape_mapper.cc. static const unsigned char codes_to_escape[32] = { 0xFF, 0xFF, 0xFF, 0xFF, // control diff --git a/tools/Makefile.am b/tools/Makefile.am index c806d5ba793..135bed0a213 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -54,6 +54,14 @@ http_load_http_load_SOURCES = \ endif +if BUILD_TEST_TOOLS +bin_PROGRAMS += escape_mapper/escape_mapper +else +noinst_PROGRAMS += escape_mapper/escape_mapper +endif + +escape_mapper_escape_mapper_SOURCES = escape_mapper/escape_mapper.cc + all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(DATA) @sed "s/ -fPIE//" tsxs > tsxs.new @mv -f tsxs.new tsxs diff --git a/tools/escape_mapper/README b/tools/escape_mapper/README new file mode 100644 index 00000000000..860c9a823ae --- /dev/null +++ b/tools/escape_mapper/README @@ -0,0 +1,14 @@ +The `escape_mapper` tool is a simple utility that allows one to view +the existing table used for URL escaping, found in LogUtils, specifically +the escapify_url_common function. This function allows one to specify +an alternate table that contains more characters to escape than the +RFC-compliant default. + +WARNING: the default encoded characters list is currently hardcoded. + +This tool operates in two basic modes: + 1) print out the default mappings + 2) print out the new mapping based on the provided argument + +Only one argument is supported to generate a new mapping table, for example: + ./escape_mapper '&,+/=' diff --git a/tools/escape_mapper/escape_mapper.cc b/tools/escape_mapper/escape_mapper.cc new file mode 100644 index 00000000000..fc8ca2505c3 --- /dev/null +++ b/tools/escape_mapper/escape_mapper.cc @@ -0,0 +1,85 @@ +/** @file + + A brief file description + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include +#include +#include + +void +add_mapping(unsigned char *table, char c) +{ + int i = c / 8; + int x = 7 - c % 8; + int r = 1 << x; + + table[i] = table[i] | r; +} + +int +main(int argc, char *argv[]) +{ + // only support a single arg that contains all the chars we wish to escapify + if (argc > 1 && argc != 2) { + std::cerr << "Provide a single argument with a list of characters to add to the default encoding table." << std::endl; + return (1); + } + + // default characters supported by the codes_to_escape table found in LogUtils.cc + unsigned char to_escape[16] = { + ' ', '"', '#', '%', '<', '>', '[', ']', '\\', '^', '`', '{', '|', '}', '~', 0x7F, + }; + + unsigned char escape_codes[32] = {0}; + + // indexes 0-3 are marked as "control" + for (int i = 0; i < 4; i++) { + escape_codes[i] = 0xFF; + } + + // add_mapping performs a logical or on the entries, so the above 0xFF values + // will persist. + for (auto char_to_escape : to_escape) { + add_mapping(&escape_codes[0], char_to_escape); + } + + // add the chars specified in argv + if (argc > 1) { + std::string_view escape_characters{argv[1]}; + for (auto const char_to_escape : escape_characters) { + std::cout << "Adding '" << char_to_escape << "' to escape mapping table." << std::endl; + add_mapping(&escape_codes[0], char_to_escape); + } + std::cout << std::endl; + } + + std::string_view qualification{((argc > 1) ? "New" : "Default")}; + std::cout << qualification << " Escape Mapping Table:" << std::endl; + + for (unsigned long i = 0; i < sizeof(escape_codes) / sizeof(escape_codes[0]); i++) { + std::cout << std::dec << std::setfill(' ') << std::setw(4) << i << ": 0x"; + auto const escape_code = static_cast(escape_codes[i]); + std::cout << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << escape_code << std::endl; + } + + return 0; +}