From 736009b973ad44ef4c7185f5f3bfb778903e4316 Mon Sep 17 00:00:00 2001 From: Thomas Jackson Date: Mon, 23 May 2016 09:20:36 -0700 Subject: [PATCH 1/2] TS-3959: requests are retryable only if bytes were sent and there was no connection error The root issue that TS-4328 and TS-3440 were trying to fix was that ATS was too aggressive in retries to origin (namely after the origin had already recieved the request). The previous patches attempted to have ATS not retry assuming bytes were sent on the wire-- sadly TS-4328 actually only checks that we set bytes to be written to the wire. To fix this we simply check if there was a connection failure to origin, if so we'll assume that it is retryable (since connection failures to origin should only be due to errors writing/connecting etc.). --- proxy/http/HttpTransact.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index d4818522b46..077590855b8 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -6473,8 +6473,8 @@ HttpTransact::is_request_valid(State *s, HTTPHdr *incoming_request) bool HttpTransact::is_request_retryable(State *s) { - // If the connection was established-- we cannot retry - if (s->state_machine->server_request_hdr_bytes > 0) { + // If there was no error establishing the connection (and we sent bytes)-- we cannot retry + if (s->current.state != CONNECTION_ERROR && s->state_machine->server_request_hdr_bytes > 0) { return false; } From b67906d1a08cc543f5c768676cef606d152afe16 Mon Sep 17 00:00:00 2001 From: Thomas Jackson Date: Tue, 24 May 2016 08:17:19 -0700 Subject: [PATCH 2/2] Add tests for TS-3959 Specifically, we want to test the race condition between KA resuse ATS side and the origin closing the connection --- ci/tsqa/tests/test_connect_attempts.py | 42 ++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/ci/tsqa/tests/test_connect_attempts.py b/ci/tsqa/tests/test_connect_attempts.py index 231f94d368e..d52f41e41f9 100644 --- a/ci/tsqa/tests/test_connect_attempts.py +++ b/ci/tsqa/tests/test_connect_attempts.py @@ -111,6 +111,36 @@ def thread_slow_response(sock): sleep_time -= 1 +def thread_slow_close(sock): + ''' + Thread to sleep a decreasing amount of time after the request, before closing + + sleep times: 2 -> 1 -> 0 + ''' + sock.listen(0) + sleep_time = 2 + num_requests = 0 + # poll + while True: + select.select([sock], [], []) + try: + connection, addr = sock.accept() + connection.send(( + 'HTTP/1.1 200 OK\r\n' + 'Content-Length: {body_len}\r\n' + 'Content-Type: text/html; charset=UTF-8\r\n' + 'Connection: close\r\n\r\n{body}'.format(body_len=len(str(num_requests)), body=num_requests) + )) + time.sleep(sleep_time) + connection.close() + num_requests += 1 + except Exception as e: + print 'connection died!', e + pass + if sleep_time > 0: + sleep_time -= 1 + + class TestOriginServerConnectAttempts(helpers.EnvironmentCase): @classmethod def setUpEnv(cls, env): @@ -154,6 +184,11 @@ def _add_sock(name): t.daemon = True t.start() + sock = _add_sock('slow_close') + t = threading.Thread(target=thread_slow_close, args=(sock,)) + t.daemon = True + t.start() + # only add server headers when there weren't any cls.configs['records.config']['CONFIG']['proxy.config.http.response_server_enabled'] = 2 @@ -206,3 +241,10 @@ def test_slow_response(self): ret = requests.get(url) # make sure it worked self.assertEqual(ret.status_code, 504) + + def test_slow_close(self): + '''Verify that we retry connecting to an origin when there is a connection failure''' + url = 'http://127.0.0.1:{0}/slow_close/s'.format(self.configs['records.config']['CONFIG']['proxy.config.http.server_ports']) + ret = requests.get(url) + # make sure it worked + self.assertEqual(ret.status_code, 200)