From 791c968b17b081fe56621940a4876fe57b1d5a57 Mon Sep 17 00:00:00 2001 From: Serris Lew Date: Fri, 4 Nov 2022 16:57:58 -0700 Subject: [PATCH 1/5] Adding back set connect fail for generic I/O error --- proxy/http/HttpTransact.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index ac9beb41799..5bc5ea5b365 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -3752,6 +3752,11 @@ HttpTransact::handle_response_from_server(State *s) case CONNECTION_CLOSED: case BAD_INCOMING_RESPONSE: + // Set to generic I/O error if not already set specifically. + if (!s->current.server->had_connect_fail()) { + s->set_connect_fail(EIO); + } + if (is_server_negative_cached(s)) { max_connect_retries = s->txn_conf->connect_attempts_max_retries_dead_server - 1; } else { From bb69a5bf30744d396c7bd2269c7f1e44c29a9b51 Mon Sep 17 00:00:00 2001 From: Serris Lew Date: Wed, 9 Nov 2022 17:04:06 -0800 Subject: [PATCH 2/5] Set generic error earlier & added autest --- proxy/http/HttpSM.cc | 6 +- proxy/http/HttpTransact.cc | 5 - tests/gold_tests/dns/dns_host_down.test.py | 93 +++++++++++++++++++ tests/gold_tests/dns/hosts_file | 1 + .../dns/replay/server_down.replay.yaml | 54 +++++++++++ 5 files changed, 151 insertions(+), 8 deletions(-) create mode 100644 tests/gold_tests/dns/dns_host_down.test.py create mode 100644 tests/gold_tests/dns/hosts_file create mode 100644 tests/gold_tests/dns/replay/server_down.replay.yaml diff --git a/proxy/http/HttpSM.cc b/proxy/http/HttpSM.cc index 69ee672a674..d71115f4952 100644 --- a/proxy/http/HttpSM.cc +++ b/proxy/http/HttpSM.cc @@ -1919,9 +1919,6 @@ HttpSM::state_http_server_open(int event, void *data) server_entry->write_vio = server_txn->do_io_write(this, nbytes, server_txn->get_remote_reader()); - // Pre-emptively set a server connect failure that will be cleared once a WRITE_READY is received from origin or - // bytes are received back - t_state.set_connect_fail(EIO); } else { // in the case of an intercept plugin don't to the connect timeout change SMDebug("http", "not setting handler for TCP handshake"); handle_http_server_open(); @@ -7626,6 +7623,9 @@ HttpSM::set_next_state() } case HttpTransact::SM_ACTION_ORIGIN_SERVER_OPEN: { + // Pre-emptively set a server connect failure that will be cleared once a WRITE_READY is received from origin or + // bytes are received back + t_state.set_connect_fail(EIO); HTTP_SM_SET_DEFAULT_HANDLER(&HttpSM::state_http_server_open); // We need to close the previous attempt diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index 5bc5ea5b365..ac9beb41799 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -3752,11 +3752,6 @@ HttpTransact::handle_response_from_server(State *s) case CONNECTION_CLOSED: case BAD_INCOMING_RESPONSE: - // Set to generic I/O error if not already set specifically. - if (!s->current.server->had_connect_fail()) { - s->set_connect_fail(EIO); - } - if (is_server_negative_cached(s)) { max_connect_retries = s->txn_conf->connect_attempts_max_retries_dead_server - 1; } else { diff --git a/tests/gold_tests/dns/dns_host_down.test.py b/tests/gold_tests/dns/dns_host_down.test.py new file mode 100644 index 00000000000..4c9f6065db6 --- /dev/null +++ b/tests/gold_tests/dns/dns_host_down.test.py @@ -0,0 +1,93 @@ +''' +Verify ATS handles down origin servers with domain cached correctly. +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ports import get_port +import os + +Test.Summary = ''' +Verify ATS handles down origin servers with cached domain correctly. +''' + +class DownCachedOriginServerTest: + replay_file = "replay/server_down.replay.yaml" + + def __init__(self, host_down = False): + """Initialize the Test processes for the test runs.""" + self._dns_port = None + + def _configure_trafficserver(self): + """Configure Traffic Server.""" + self._ts = Test.MakeATSProcess("ts", enable_cache=False) + + self._ts.Disk.remap_config.AddLine( + f"map / http://resolve.this.com:{self._server.Variables.http_port}/" + ) + + self._ts.Disk.records_config.update({ + 'proxy.config.diags.debug.enabled': 0, + 'proxy.config.diags.debug.tags': 'hostdb|dns|http|socket', + 'proxy.config.http.connect_attempts_max_retries': 0, + 'proxy.config.http.connect_attempts_rr_retries': 0, + 'proxy.config.hostdb.fail.timeout': 10, + 'proxy.config.dns.resolv_conf': 'NULL', + 'proxy.config.hostdb.ttl_mode': 1, + 'proxy.config.hostdb.timeout': 10, + 'proxy.config.hostdb.lookup_timeout': 2, + 'proxy.config.http.transaction_no_activity_timeout_in': 2, + 'proxy.config.hostdb.host_file.interval' : 1, + 'proxy.config.hostdb.host_file.path': os.path.join(Test.TestDirectory, "hosts_file"), + }) + + # Even when the origin server is down, SM will return a hit-fresh domain from HostDB. + # After request has failed, SM should mark the IP as down + def _test_host_mark_down(self): + tr = Test.AddTestRun() + self._server = tr.AddVerifierServerProcess("server", DownCachedOriginServerTest.replay_file) + self._configure_trafficserver() + + tr.Processes.Default.StartBefore(self._server) + tr.Processes.Default.StartBefore(self._ts) + + tr.DelayStart = 1 + tr.AddVerifierClientProcess( + "client-1", + DownCachedOriginServerTest.replay_file, + http_ports=[self._ts.Variables.port], + other_args='--keys 1') + + tr.NotRunningAfter = self._server + + # After host has been marked down from previous test, HostDB should not return + # the host as available and DNS lookup should fail. + def _test_host_unreachable(self): + tr = Test.AddTestRun() + + tr.NotRunningBefore = self._server + + tr.DelayStart = 1 + tr.AddVerifierClientProcess( + "client-2", + DownCachedOriginServerTest.replay_file, + http_ports=[self._ts.Variables.port], + other_args='--keys 2') + + def run(self): + self._test_host_mark_down() + self._test_host_unreachable() + +DownCachedOriginServerTest().run() diff --git a/tests/gold_tests/dns/hosts_file b/tests/gold_tests/dns/hosts_file new file mode 100644 index 00000000000..a0f5d2c70e3 --- /dev/null +++ b/tests/gold_tests/dns/hosts_file @@ -0,0 +1 @@ +0.0.0.1 resolve.this.com diff --git a/tests/gold_tests/dns/replay/server_down.replay.yaml b/tests/gold_tests/dns/replay/server_down.replay.yaml new file mode 100644 index 00000000000..34dc7aca9e2 --- /dev/null +++ b/tests/gold_tests/dns/replay/server_down.replay.yaml @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +meta: + version: "1.0" + +sessions: +- transactions: + - client-request: + delay: 2s + method: "GET" + version: "1.1" + url: /dns/mark/down + headers: + fields: + - [ Host, example.com ] + - [ X-Request, request ] + - [ uuid, 1 ] + + server-response: + status: 200 + + proxy-response: + status: 502 + + - client-request: + delay: 2s + method: "GET" + version: "1.1" + url: /dns/unreachable + headers: + fields: + - [ Host, example.com ] + - [ X-Request, request ] + - [ uuid, 2 ] + + server-response: + status: 200 + + proxy-response: + status: 500 From befad54fb16c129ba7621eed8918e22989794899 Mon Sep 17 00:00:00 2001 From: Serris Lew Date: Thu, 10 Nov 2022 10:05:11 -0800 Subject: [PATCH 3/5] Format issues --- tests/gold_tests/dns/dns_host_down.test.py | 8 +++++--- tests/gold_tests/dns/hosts_file | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tests/gold_tests/dns/dns_host_down.test.py b/tests/gold_tests/dns/dns_host_down.test.py index 4c9f6065db6..b68277c6f60 100644 --- a/tests/gold_tests/dns/dns_host_down.test.py +++ b/tests/gold_tests/dns/dns_host_down.test.py @@ -23,10 +23,11 @@ Verify ATS handles down origin servers with cached domain correctly. ''' + class DownCachedOriginServerTest: replay_file = "replay/server_down.replay.yaml" - def __init__(self, host_down = False): + def __init__(self): """Initialize the Test processes for the test runs.""" self._dns_port = None @@ -39,7 +40,7 @@ def _configure_trafficserver(self): ) self._ts.Disk.records_config.update({ - 'proxy.config.diags.debug.enabled': 0, + 'proxy.config.diags.debug.enabled': 1, 'proxy.config.diags.debug.tags': 'hostdb|dns|http|socket', 'proxy.config.http.connect_attempts_max_retries': 0, 'proxy.config.http.connect_attempts_rr_retries': 0, @@ -49,7 +50,7 @@ def _configure_trafficserver(self): 'proxy.config.hostdb.timeout': 10, 'proxy.config.hostdb.lookup_timeout': 2, 'proxy.config.http.transaction_no_activity_timeout_in': 2, - 'proxy.config.hostdb.host_file.interval' : 1, + 'proxy.config.hostdb.host_file.interval': 1, 'proxy.config.hostdb.host_file.path': os.path.join(Test.TestDirectory, "hosts_file"), }) @@ -90,4 +91,5 @@ def run(self): self._test_host_mark_down() self._test_host_unreachable() + DownCachedOriginServerTest().run() diff --git a/tests/gold_tests/dns/hosts_file b/tests/gold_tests/dns/hosts_file index a0f5d2c70e3..aa068875a83 100644 --- a/tests/gold_tests/dns/hosts_file +++ b/tests/gold_tests/dns/hosts_file @@ -1 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + 0.0.0.1 resolve.this.com From 8dfd791e1567e51729ecd0ea17d57e63e298b7dc Mon Sep 17 00:00:00 2001 From: Serris Lew Date: Thu, 10 Nov 2022 15:36:23 -0800 Subject: [PATCH 4/5] Lower timeout limit --- tests/gold_tests/dns/dns_host_down.test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/gold_tests/dns/dns_host_down.test.py b/tests/gold_tests/dns/dns_host_down.test.py index b68277c6f60..a6e3b1dfd35 100644 --- a/tests/gold_tests/dns/dns_host_down.test.py +++ b/tests/gold_tests/dns/dns_host_down.test.py @@ -47,9 +47,10 @@ def _configure_trafficserver(self): 'proxy.config.hostdb.fail.timeout': 10, 'proxy.config.dns.resolv_conf': 'NULL', 'proxy.config.hostdb.ttl_mode': 1, - 'proxy.config.hostdb.timeout': 10, + 'proxy.config.hostdb.timeout': 2, 'proxy.config.hostdb.lookup_timeout': 2, 'proxy.config.http.transaction_no_activity_timeout_in': 2, + 'proxy.config.http.connect_attempts_timeout': 2, 'proxy.config.hostdb.host_file.interval': 1, 'proxy.config.hostdb.host_file.path': os.path.join(Test.TestDirectory, "hosts_file"), }) From 6803661ff31e3c50d15f85b733796e427e5f8486 Mon Sep 17 00:00:00 2001 From: Serris Lew Date: Fri, 11 Nov 2022 16:02:35 -0800 Subject: [PATCH 5/5] Improved autest with comments and additional checks --- tests/gold_tests/dns/dns_host_down.test.py | 24 ++++++++++++------- .../dns/replay/server_down.replay.yaml | 7 ++++++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/tests/gold_tests/dns/dns_host_down.test.py b/tests/gold_tests/dns/dns_host_down.test.py index a6e3b1dfd35..08865a76bc3 100644 --- a/tests/gold_tests/dns/dns_host_down.test.py +++ b/tests/gold_tests/dns/dns_host_down.test.py @@ -29,7 +29,8 @@ class DownCachedOriginServerTest: def __init__(self): """Initialize the Test processes for the test runs.""" - self._dns_port = None + self._server = Test.MakeVerifierServerProcess("server", DownCachedOriginServerTest.replay_file) + self._configure_trafficserver() def _configure_trafficserver(self): """Configure Traffic Server.""" @@ -59,38 +60,43 @@ def _configure_trafficserver(self): # After request has failed, SM should mark the IP as down def _test_host_mark_down(self): tr = Test.AddTestRun() - self._server = tr.AddVerifierServerProcess("server", DownCachedOriginServerTest.replay_file) - self._configure_trafficserver() tr.Processes.Default.StartBefore(self._server) tr.Processes.Default.StartBefore(self._ts) - tr.DelayStart = 1 tr.AddVerifierClientProcess( "client-1", DownCachedOriginServerTest.replay_file, http_ports=[self._ts.Variables.port], other_args='--keys 1') - tr.NotRunningAfter = self._server - # After host has been marked down from previous test, HostDB should not return # the host as available and DNS lookup should fail. def _test_host_unreachable(self): tr = Test.AddTestRun() - tr.NotRunningBefore = self._server - - tr.DelayStart = 1 tr.AddVerifierClientProcess( "client-2", DownCachedOriginServerTest.replay_file, http_ports=[self._ts.Variables.port], other_args='--keys 2') + # Verify error log marking host down exists + def _test_error_log(self): + tr = Test.AddTestRun() + tr.Processes.Default.Command = ( + os.path.join(Test.Variables.AtsTestToolsDir, 'condwait') + ' 60 1 -f ' + + os.path.join(self._ts.Variables.LOGDIR, 'error.log') + ) + + self._ts.Disk.error_log.Content = Testers.ContainsExpression("/dns/mark/down' marking down", "host should be marked down") + self._ts.Disk.error_log.Content = Testers.ContainsExpression( + "DNS Error: no valid server http://resolve.this.com", "DNS lookup should fail") + def run(self): self._test_host_mark_down() self._test_host_unreachable() + self._test_error_log() DownCachedOriginServerTest().run() diff --git a/tests/gold_tests/dns/replay/server_down.replay.yaml b/tests/gold_tests/dns/replay/server_down.replay.yaml index 34dc7aca9e2..d1dd1e20222 100644 --- a/tests/gold_tests/dns/replay/server_down.replay.yaml +++ b/tests/gold_tests/dns/replay/server_down.replay.yaml @@ -20,6 +20,7 @@ meta: sessions: - transactions: - client-request: + # Delay to allow hostdb to sync external host file delay: 2s method: "GET" version: "1.1" @@ -30,13 +31,16 @@ sessions: - [ X-Request, request ] - [ uuid, 1 ] + # Shouldn't be reached since server IP is unreachable server-response: status: 200 + # Returns 502 since server connection is unreachable proxy-response: status: 502 - client-request: + # Delay to allow hostdb to sync external host file delay: 2s method: "GET" version: "1.1" @@ -47,8 +51,11 @@ sessions: - [ X-Request, request ] - [ uuid, 2 ] + # Shouldn't be reached since server IP is unreachable server-response: status: 200 + # Previous request marked host down so DNS lookup returns unsuccessful + # 500 response indicates no valid server proxy-response: status: 500