From 1335d5b09e62c0f33e1765ac9ecc959a3a30126c Mon Sep 17 00:00:00 2001 From: bneradt Date: Tue, 11 Feb 2020 22:43:46 +0000 Subject: [PATCH] Traffic Dump: fix client request target parsing Before this change, traffic_dump included the remapped URL in the request target put into the client-request field, even if the client's request had no such URL in the target. This fixes things so that the URL field is accurate with respect to the original request. There's a couple other minor tweaks: * Format UUID as an "all" headers field. * start-time -> connection-time, per Proxy Verifier's expectations. --- .../experimental/traffic_dump/traffic_dump.cc | 81 +++++++++++++++---- .../traffic_dump/gold/explicit_target.gold | 8 ++ .../traffic_dump/traffic_dump.test.py | 51 +++++++++++- .../pluginTest/traffic_dump/verify_replay.py | 18 +++++ 4 files changed, 139 insertions(+), 19 deletions(-) create mode 100644 tests/gold_tests/pluginTest/traffic_dump/gold/explicit_target.gold diff --git a/plugins/experimental/traffic_dump/traffic_dump.cc b/plugins/experimental/traffic_dump/traffic_dump.cc index 464dd41d1b2..f915ce54679 100644 --- a/plugins/experimental/traffic_dump/traffic_dump.cc +++ b/plugins/experimental/traffic_dump/traffic_dump.cc @@ -198,9 +198,9 @@ escape_json(const char *buf, int64_t size) } inline std::string -json_entry(std::string const &name, const char *buf, int64_t size) +json_entry(std::string const &name, const char *value, int64_t size) { - return "\"" + escape_json(name) + "\":\"" + escape_json(buf, size) + "\""; + return "\"" + escape_json(name) + "\":\"" + escape_json(value, size) + "\""; } /// json_entry_array(): Formats to array-style entry i.e. ["field","value"] @@ -210,6 +210,21 @@ json_entry_array(const char *name, int name_len, const char *value, int value_le return "[\"" + escape_json(name, name_len) + "\", \"" + escape_json(value, value_len) + "\"]"; } +/** Remove the scheme prefix from the url. + * + * @return The view without the scheme prefix. + */ +std::string_view +remove_scheme_prefix(std::string_view url) +{ + const auto scheme_separator = url.find("://"); + if (scheme_separator == std::string::npos) { + return url; + } + url.remove_prefix(scheme_separator + 3); + return url; +} + /// Helper functions to collect txn information from TSMBuffer std::string collect_headers(TSMBuffer &buffer, TSMLoc &hdr_loc, int64_t body_bytes) @@ -219,26 +234,42 @@ collect_headers(TSMBuffer &buffer, TSMLoc &hdr_loc, int64_t body_bytes) const char *cp = nullptr; TSMLoc url_loc = nullptr; - // Log scheme+method or status+reason based on header type + // Log scheme+method+request-target or status+reason based on header type if (TSHttpHdrTypeGet(buffer, hdr_loc) == TS_HTTP_TYPE_REQUEST) { // 1. "version" int version = TSHttpHdrVersionGet(buffer, hdr_loc); result += R"("version":")" + std::to_string(TS_HTTP_MAJOR(version)) + "." + std::to_string(TS_HTTP_MINOR(version)) + '"'; - // 2. "scheme": TSAssert(TS_SUCCESS == TSHttpHdrUrlGet(buffer, hdr_loc, &url_loc)); + // 2. "scheme": cp = TSUrlSchemeGet(buffer, url_loc, &len); - TSDebug(PLUGIN_NAME, "collect_headers(): found scheme %d ", len); + TSDebug(PLUGIN_NAME, "collect_headers(): found scheme %.*s ", len, cp); result += "," + json_entry("scheme", cp, len); // 3. "method":(string) cp = TSHttpHdrMethodGet(buffer, hdr_loc, &len); + TSDebug(PLUGIN_NAME, "collect_headers(): found method %.*s ", len, cp); result += "," + json_entry("method", cp, len); // 4. "url" - cp = TSUrlStringGet(buffer, url_loc, &len); - TSDebug(PLUGIN_NAME, "collect_headers(): found url %.*s", len, cp); - result += "," + json_entry("url", cp, len); + cp = TSUrlHostGet(buffer, url_loc, &len); + std::string_view host{cp, static_cast(len)}; + + char *url = TSUrlStringGet(buffer, url_loc, &len); + std::string_view url_string{url, static_cast(len)}; + + if (host.empty()) { + // TSUrlStringGet will add the scheme to the URL, even if the request + // target doesn't contain it. However, we cannot just always remove the + // scheme because the original request target may include it. We assume + // here that a URL with a scheme but not a host is artificial and thus + // we remove it. + url_string = remove_scheme_prefix(url_string); + } + + TSDebug(PLUGIN_NAME, "collect_headers(): found host target %.*s", static_cast(url_string.size()), url_string.data()); + result += "," + json_entry("url", url_string.data(), url_string.size()); + TSfree(url); TSHandleMLocRelease(buffer, hdr_loc, url_loc); } else { // 1. "status":(string) @@ -338,28 +369,38 @@ session_txn_handler(TSCont contp, TSEvent event, void *edata) return TS_SUCCESS; } + std::string txn_info; switch (event) { - case TS_EVENT_HTTP_TXN_CLOSE: { + case TS_EVENT_HTTP_TXN_START: { // Get UUID char uuid[TS_CRUUID_STRING_LEN + 1]; TSAssert(TS_SUCCESS == TSClientRequestUuidGet(txnp, uuid)); // Generate per transaction json records - std::string txn_info; if (!ssnData->first) { txn_info += ","; } ssnData->first = false; // "uuid":(string) - txn_info += "{" + json_entry("uuid", uuid, strlen(uuid)); - - // "connect-time":(number) + txn_info += "{"; + // "connection-time":(number) TSHRTime start_time; TSHttpTxnMilestoneGet(txnp, TS_MILESTONE_UA_BEGIN, &start_time); - txn_info += ",\"start-time\":" + std::to_string(start_time); + txn_info += "\"connection-time\":" + std::to_string(start_time); - // client/proxy-request/response headers + // The uuid is a header field for each message in the transaction. Use the + // "all" node to apply to each message. + std::string_view name = "uuid"; + txn_info += ",\"all\":{\"headers\":{\"fields\":[" + json_entry_array(name.data(), name.size(), uuid, strlen(uuid)); + txn_info += "]}}"; + ssnData->write_to_disk(txn_info); + break; + } + + case TS_EVENT_HTTP_READ_REQUEST_HDR: { + // We must grab the client request information before remap happens because + // the remap process modifies the request buffer. TSMBuffer buffer; TSMLoc hdr_loc; if (TS_SUCCESS == TSHttpTxnClientReqGet(txnp, &buffer, &hdr_loc)) { @@ -368,6 +409,14 @@ session_txn_handler(TSCont contp, TSEvent event, void *edata) TSHandleMLocRelease(buffer, TS_NULL_MLOC, hdr_loc); buffer = nullptr; } + ssnData->write_to_disk(txn_info); + break; + } + + case TS_EVENT_HTTP_TXN_CLOSE: { + // proxy-request/response headers + TSMBuffer buffer; + TSMLoc hdr_loc; if (TS_SUCCESS == TSHttpTxnServerReqGet(txnp, &buffer, &hdr_loc)) { TSDebug(PLUGIN_NAME, "Found proxy request"); txn_info += R"(,"proxy-request":)" + collect_headers(buffer, hdr_loc, TSHttpTxnServerReqBodyBytesGet(txnp)); @@ -514,6 +563,8 @@ global_ssn_handler(TSCont contp, TSEvent event, void *edata) } TSMutexUnlock(ssnData->disk_io_mutex); + TSHttpSsnHookAdd(ssnp, TS_HTTP_TXN_START_HOOK, ssnData->txn_cont); + TSHttpSsnHookAdd(ssnp, TS_HTTP_READ_REQUEST_HDR_HOOK, ssnData->txn_cont); TSHttpSsnHookAdd(ssnp, TS_HTTP_TXN_CLOSE_HOOK, ssnData->txn_cont); break; } diff --git a/tests/gold_tests/pluginTest/traffic_dump/gold/explicit_target.gold b/tests/gold_tests/pluginTest/traffic_dump/gold/explicit_target.gold new file mode 100644 index 00000000000..40e21cb7abf --- /dev/null +++ b/tests/gold_tests/pluginTest/traffic_dump/gold/explicit_target.gold @@ -0,0 +1,8 @@ +`` +> GET http://localhost:``/candy HTTP/1.1 +> Host: www.example.com`` +> User-Agent: curl/`` +> Accept: */* +`` +< Server: ATS/`` +`` diff --git a/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py b/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py index 1b4be842380..b0d4d6d7b1a 100644 --- a/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py +++ b/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py @@ -29,9 +29,18 @@ # Configure the origin server. server = Test.MakeOriginServer("server") -request_header = {"headers": "GET / HTTP/1.1\r\nHost: www.example.com\r\nContent-Length: 0\r\n\r\n", +request_header = {"headers": "GET / HTTP/1.1\r\n" + "Host: www.example.com\r\nContent-Length: 0\r\n\r\n", "timestamp": "1469733493.993", "body": ""} -response_header = {"headers": "HTTP/1.1 200 OK\r\nConnection: close\r\nContent-Length: 0\r\n\r\n", +response_header = {"headers": "HTTP/1.1 200 OK" + "\r\nConnection: close\r\nContent-Length: 0\r\n\r\n", + "timestamp": "1469733493.993", "body": ""} +server.addResponse("sessionfile.log", request_header, response_header) +request_header = {"headers": "GET /one HTTP/1.1\r\n" + "Host: www.example.com\r\nContent-Length: 0\r\n\r\n", + "timestamp": "1469733493.993", "body": ""} +response_header = {"headers": "HTTP/1.1 200 OK" + "\r\nConnection: close\r\nContent-Length: 0\r\n\r\n", "timestamp": "1469733493.993", "body": ""} server.addResponse("sessionfile.log", request_header, response_header) @@ -66,6 +75,12 @@ ts.Disk.File(replay_file_session_1, exists=True) replay_file_session_2 = os.path.join(replay_dir, "127", "0000000000000001") ts.Disk.File(replay_file_session_2, exists=True) +replay_file_session_3 = os.path.join(replay_dir, "127", "0000000000000002") +ts.Disk.File(replay_file_session_3, exists=True) + +# +# Test 1: Verify the correct behavior of two transactions across two sessions. +# # Execute the first transaction. tr = Test.AddTestRun("First transaction") @@ -81,7 +96,7 @@ # Execute the second transaction. tr = Test.AddTestRun("Second transaction") -tr.Processes.Default.Command = 'curl http://127.0.0.1:{0} -H\'Host: www.example.com\' --verbose'.format( +tr.Processes.Default.Command = 'curl http://127.0.0.1:{0}/one -H\'Host: www.example.com\' --verbose'.format( ts.Variables.port) tr.Processes.Default.ReturnCode = 0 tr.Processes.Default.Streams.stderr = "gold/200.gold" @@ -103,10 +118,38 @@ # Verify the properties of the replay file for the second transaction. tr = Test.AddTestRun("Verify the json content of the second session") tr.Setup.CopyAs(verify_replay, Test.RunDirectory) -tr.Processes.Default.Command = "python3 {0} {1} {2}".format( +tr.Processes.Default.Command = "python3 {0} {1} {2} --request-target '/one'".format( verify_replay, os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'), replay_file_session_2) tr.Processes.Default.ReturnCode = 0 tr.StillRunningAfter = server tr.StillRunningAfter = ts + +# +# Test 2: Verify the correct behavior of an explicit path in the request line. +# + +# Verify that an explicit path in the request line is recorded. +tr = Test.AddTestRun("Make a request with an explicit target.") +request_target = "http://localhost:{0}/candy".format(ts.Variables.port) +tr.Processes.Default.Command = ( + 'curl --request-target "{0}" ' + 'http://127.0.0.1:{1} -H\'Host: www.example.com\' --verbose'.format( + request_target, ts.Variables.port)) +tr.Processes.Default.ReturnCode = 0 +tr.Processes.Default.Streams.stderr = "gold/explicit_target.gold" +tr.StillRunningAfter = server +tr.StillRunningAfter = ts + +tr = Test.AddTestRun("Verify the replay file has the explicit target.") +tr.Setup.CopyAs(verify_replay, Test.RunDirectory) + +tr.Processes.Default.Command = "python3 {0} {1} {2} --request-target '{3}'".format( + verify_replay, + os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'), + replay_file_session_3, + request_target) +tr.Processes.Default.ReturnCode = 0 +tr.StillRunningAfter = server +tr.StillRunningAfter = ts diff --git a/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py b/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py index 5b5738a8e7f..5a33061acfe 100644 --- a/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py +++ b/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py @@ -57,6 +57,19 @@ def verify_there_was_a_transaction(replay_json): return True +def verify_request_target(replay_json, request_target): + """ + Verify that the 'url' element of the first transaction contains the request target. + """ + try: + url = replay_json['sessions'][0]['transactions'][0]['client-request']['url'] + except KeyError: + print("The replay file did not have a first transaction with a url element.") + return False + + return url == request_target + + def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("schema_file", @@ -65,6 +78,8 @@ def parse_args(): parser.add_argument("replay_file", type=argparse.FileType('r'), help="The replay file to validate.") + parser.add_argument("--request-target", + help="The request target ('url' element) to expect in the replay file.") return parser.parse_args() @@ -89,6 +104,9 @@ def main(): if not verify_there_was_a_transaction(replay_json): return 1 + if args.request_target and not verify_request_target(replay_json, args.request_target): + return 1 + return 0