diff --git a/plugins/experimental/traffic_dump/traffic_dump.cc b/plugins/experimental/traffic_dump/traffic_dump.cc index 464dd41d1b2..f915ce54679 100644 --- a/plugins/experimental/traffic_dump/traffic_dump.cc +++ b/plugins/experimental/traffic_dump/traffic_dump.cc @@ -198,9 +198,9 @@ escape_json(const char *buf, int64_t size) } inline std::string -json_entry(std::string const &name, const char *buf, int64_t size) +json_entry(std::string const &name, const char *value, int64_t size) { - return "\"" + escape_json(name) + "\":\"" + escape_json(buf, size) + "\""; + return "\"" + escape_json(name) + "\":\"" + escape_json(value, size) + "\""; } /// json_entry_array(): Formats to array-style entry i.e. ["field","value"] @@ -210,6 +210,21 @@ json_entry_array(const char *name, int name_len, const char *value, int value_le return "[\"" + escape_json(name, name_len) + "\", \"" + escape_json(value, value_len) + "\"]"; } +/** Remove the scheme prefix from the url. + * + * @return The view without the scheme prefix. + */ +std::string_view +remove_scheme_prefix(std::string_view url) +{ + const auto scheme_separator = url.find("://"); + if (scheme_separator == std::string::npos) { + return url; + } + url.remove_prefix(scheme_separator + 3); + return url; +} + /// Helper functions to collect txn information from TSMBuffer std::string collect_headers(TSMBuffer &buffer, TSMLoc &hdr_loc, int64_t body_bytes) @@ -219,26 +234,42 @@ collect_headers(TSMBuffer &buffer, TSMLoc &hdr_loc, int64_t body_bytes) const char *cp = nullptr; TSMLoc url_loc = nullptr; - // Log scheme+method or status+reason based on header type + // Log scheme+method+request-target or status+reason based on header type if (TSHttpHdrTypeGet(buffer, hdr_loc) == TS_HTTP_TYPE_REQUEST) { // 1. "version" int version = TSHttpHdrVersionGet(buffer, hdr_loc); result += R"("version":")" + std::to_string(TS_HTTP_MAJOR(version)) + "." + std::to_string(TS_HTTP_MINOR(version)) + '"'; - // 2. "scheme": TSAssert(TS_SUCCESS == TSHttpHdrUrlGet(buffer, hdr_loc, &url_loc)); + // 2. "scheme": cp = TSUrlSchemeGet(buffer, url_loc, &len); - TSDebug(PLUGIN_NAME, "collect_headers(): found scheme %d ", len); + TSDebug(PLUGIN_NAME, "collect_headers(): found scheme %.*s ", len, cp); result += "," + json_entry("scheme", cp, len); // 3. "method":(string) cp = TSHttpHdrMethodGet(buffer, hdr_loc, &len); + TSDebug(PLUGIN_NAME, "collect_headers(): found method %.*s ", len, cp); result += "," + json_entry("method", cp, len); // 4. "url" - cp = TSUrlStringGet(buffer, url_loc, &len); - TSDebug(PLUGIN_NAME, "collect_headers(): found url %.*s", len, cp); - result += "," + json_entry("url", cp, len); + cp = TSUrlHostGet(buffer, url_loc, &len); + std::string_view host{cp, static_cast(len)}; + + char *url = TSUrlStringGet(buffer, url_loc, &len); + std::string_view url_string{url, static_cast(len)}; + + if (host.empty()) { + // TSUrlStringGet will add the scheme to the URL, even if the request + // target doesn't contain it. However, we cannot just always remove the + // scheme because the original request target may include it. We assume + // here that a URL with a scheme but not a host is artificial and thus + // we remove it. + url_string = remove_scheme_prefix(url_string); + } + + TSDebug(PLUGIN_NAME, "collect_headers(): found host target %.*s", static_cast(url_string.size()), url_string.data()); + result += "," + json_entry("url", url_string.data(), url_string.size()); + TSfree(url); TSHandleMLocRelease(buffer, hdr_loc, url_loc); } else { // 1. "status":(string) @@ -338,28 +369,38 @@ session_txn_handler(TSCont contp, TSEvent event, void *edata) return TS_SUCCESS; } + std::string txn_info; switch (event) { - case TS_EVENT_HTTP_TXN_CLOSE: { + case TS_EVENT_HTTP_TXN_START: { // Get UUID char uuid[TS_CRUUID_STRING_LEN + 1]; TSAssert(TS_SUCCESS == TSClientRequestUuidGet(txnp, uuid)); // Generate per transaction json records - std::string txn_info; if (!ssnData->first) { txn_info += ","; } ssnData->first = false; // "uuid":(string) - txn_info += "{" + json_entry("uuid", uuid, strlen(uuid)); - - // "connect-time":(number) + txn_info += "{"; + // "connection-time":(number) TSHRTime start_time; TSHttpTxnMilestoneGet(txnp, TS_MILESTONE_UA_BEGIN, &start_time); - txn_info += ",\"start-time\":" + std::to_string(start_time); + txn_info += "\"connection-time\":" + std::to_string(start_time); - // client/proxy-request/response headers + // The uuid is a header field for each message in the transaction. Use the + // "all" node to apply to each message. + std::string_view name = "uuid"; + txn_info += ",\"all\":{\"headers\":{\"fields\":[" + json_entry_array(name.data(), name.size(), uuid, strlen(uuid)); + txn_info += "]}}"; + ssnData->write_to_disk(txn_info); + break; + } + + case TS_EVENT_HTTP_READ_REQUEST_HDR: { + // We must grab the client request information before remap happens because + // the remap process modifies the request buffer. TSMBuffer buffer; TSMLoc hdr_loc; if (TS_SUCCESS == TSHttpTxnClientReqGet(txnp, &buffer, &hdr_loc)) { @@ -368,6 +409,14 @@ session_txn_handler(TSCont contp, TSEvent event, void *edata) TSHandleMLocRelease(buffer, TS_NULL_MLOC, hdr_loc); buffer = nullptr; } + ssnData->write_to_disk(txn_info); + break; + } + + case TS_EVENT_HTTP_TXN_CLOSE: { + // proxy-request/response headers + TSMBuffer buffer; + TSMLoc hdr_loc; if (TS_SUCCESS == TSHttpTxnServerReqGet(txnp, &buffer, &hdr_loc)) { TSDebug(PLUGIN_NAME, "Found proxy request"); txn_info += R"(,"proxy-request":)" + collect_headers(buffer, hdr_loc, TSHttpTxnServerReqBodyBytesGet(txnp)); @@ -514,6 +563,8 @@ global_ssn_handler(TSCont contp, TSEvent event, void *edata) } TSMutexUnlock(ssnData->disk_io_mutex); + TSHttpSsnHookAdd(ssnp, TS_HTTP_TXN_START_HOOK, ssnData->txn_cont); + TSHttpSsnHookAdd(ssnp, TS_HTTP_READ_REQUEST_HDR_HOOK, ssnData->txn_cont); TSHttpSsnHookAdd(ssnp, TS_HTTP_TXN_CLOSE_HOOK, ssnData->txn_cont); break; } diff --git a/tests/gold_tests/pluginTest/traffic_dump/gold/explicit_target.gold b/tests/gold_tests/pluginTest/traffic_dump/gold/explicit_target.gold new file mode 100644 index 00000000000..40e21cb7abf --- /dev/null +++ b/tests/gold_tests/pluginTest/traffic_dump/gold/explicit_target.gold @@ -0,0 +1,8 @@ +`` +> GET http://localhost:``/candy HTTP/1.1 +> Host: www.example.com`` +> User-Agent: curl/`` +> Accept: */* +`` +< Server: ATS/`` +`` diff --git a/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py b/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py index 1b4be842380..b0d4d6d7b1a 100644 --- a/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py +++ b/tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py @@ -29,9 +29,18 @@ # Configure the origin server. server = Test.MakeOriginServer("server") -request_header = {"headers": "GET / HTTP/1.1\r\nHost: www.example.com\r\nContent-Length: 0\r\n\r\n", +request_header = {"headers": "GET / HTTP/1.1\r\n" + "Host: www.example.com\r\nContent-Length: 0\r\n\r\n", "timestamp": "1469733493.993", "body": ""} -response_header = {"headers": "HTTP/1.1 200 OK\r\nConnection: close\r\nContent-Length: 0\r\n\r\n", +response_header = {"headers": "HTTP/1.1 200 OK" + "\r\nConnection: close\r\nContent-Length: 0\r\n\r\n", + "timestamp": "1469733493.993", "body": ""} +server.addResponse("sessionfile.log", request_header, response_header) +request_header = {"headers": "GET /one HTTP/1.1\r\n" + "Host: www.example.com\r\nContent-Length: 0\r\n\r\n", + "timestamp": "1469733493.993", "body": ""} +response_header = {"headers": "HTTP/1.1 200 OK" + "\r\nConnection: close\r\nContent-Length: 0\r\n\r\n", "timestamp": "1469733493.993", "body": ""} server.addResponse("sessionfile.log", request_header, response_header) @@ -66,6 +75,12 @@ ts.Disk.File(replay_file_session_1, exists=True) replay_file_session_2 = os.path.join(replay_dir, "127", "0000000000000001") ts.Disk.File(replay_file_session_2, exists=True) +replay_file_session_3 = os.path.join(replay_dir, "127", "0000000000000002") +ts.Disk.File(replay_file_session_3, exists=True) + +# +# Test 1: Verify the correct behavior of two transactions across two sessions. +# # Execute the first transaction. tr = Test.AddTestRun("First transaction") @@ -81,7 +96,7 @@ # Execute the second transaction. tr = Test.AddTestRun("Second transaction") -tr.Processes.Default.Command = 'curl http://127.0.0.1:{0} -H\'Host: www.example.com\' --verbose'.format( +tr.Processes.Default.Command = 'curl http://127.0.0.1:{0}/one -H\'Host: www.example.com\' --verbose'.format( ts.Variables.port) tr.Processes.Default.ReturnCode = 0 tr.Processes.Default.Streams.stderr = "gold/200.gold" @@ -103,10 +118,38 @@ # Verify the properties of the replay file for the second transaction. tr = Test.AddTestRun("Verify the json content of the second session") tr.Setup.CopyAs(verify_replay, Test.RunDirectory) -tr.Processes.Default.Command = "python3 {0} {1} {2}".format( +tr.Processes.Default.Command = "python3 {0} {1} {2} --request-target '/one'".format( verify_replay, os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'), replay_file_session_2) tr.Processes.Default.ReturnCode = 0 tr.StillRunningAfter = server tr.StillRunningAfter = ts + +# +# Test 2: Verify the correct behavior of an explicit path in the request line. +# + +# Verify that an explicit path in the request line is recorded. +tr = Test.AddTestRun("Make a request with an explicit target.") +request_target = "http://localhost:{0}/candy".format(ts.Variables.port) +tr.Processes.Default.Command = ( + 'curl --request-target "{0}" ' + 'http://127.0.0.1:{1} -H\'Host: www.example.com\' --verbose'.format( + request_target, ts.Variables.port)) +tr.Processes.Default.ReturnCode = 0 +tr.Processes.Default.Streams.stderr = "gold/explicit_target.gold" +tr.StillRunningAfter = server +tr.StillRunningAfter = ts + +tr = Test.AddTestRun("Verify the replay file has the explicit target.") +tr.Setup.CopyAs(verify_replay, Test.RunDirectory) + +tr.Processes.Default.Command = "python3 {0} {1} {2} --request-target '{3}'".format( + verify_replay, + os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'), + replay_file_session_3, + request_target) +tr.Processes.Default.ReturnCode = 0 +tr.StillRunningAfter = server +tr.StillRunningAfter = ts diff --git a/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py b/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py index 5b5738a8e7f..5a33061acfe 100644 --- a/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py +++ b/tests/gold_tests/pluginTest/traffic_dump/verify_replay.py @@ -57,6 +57,19 @@ def verify_there_was_a_transaction(replay_json): return True +def verify_request_target(replay_json, request_target): + """ + Verify that the 'url' element of the first transaction contains the request target. + """ + try: + url = replay_json['sessions'][0]['transactions'][0]['client-request']['url'] + except KeyError: + print("The replay file did not have a first transaction with a url element.") + return False + + return url == request_target + + def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("schema_file", @@ -65,6 +78,8 @@ def parse_args(): parser.add_argument("replay_file", type=argparse.FileType('r'), help="The replay file to validate.") + parser.add_argument("--request-target", + help="The request target ('url' element) to expect in the replay file.") return parser.parse_args() @@ -89,6 +104,9 @@ def main(): if not verify_there_was_a_transaction(replay_json): return 1 + if args.request_target and not verify_request_target(replay_json, args.request_target): + return 1 + return 0