From 33d45a24f7dd07a104b2579bdf1f847005573f34 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 16 Feb 2021 09:55:50 -0500 Subject: [PATCH 1/5] Always check the header size before downloading the file. --- synapse/http/client.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 37ccf5ab98f7..b553f5ca3032 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -699,18 +699,6 @@ async def get_file( resp_headers = dict(response.headers.getAllRawHeaders()) - if ( - b"Content-Length" in resp_headers - and max_size - and int(resp_headers[b"Content-Length"][0]) > max_size - ): - logger.warning("Requested URL is too large > %r bytes" % (max_size,)) - raise SynapseError( - 502, - "Requested file is too large > %r bytes" % (max_size,), - Codes.TOO_LARGE, - ) - if response.code > 299: logger.warning("Got %d when downloading %s" % (response.code, url)) raise SynapseError(502, "Got error %d" % (response.code,), Codes.UNKNOWN) @@ -811,6 +799,12 @@ def read_body_with_max_size( Returns: A Deferred which resolves to the length of the read body. """ + # If the Content-Length header gives a size larger than the maximum allowed + # size, do not bother downloading the body. + if max_size is not None: + content_length_headers = response.headers.getRawHeaders(b"Content-Length", None) + if content_length_headers and int(content_length_headers[0]) > max_size: + return defer.fail(BodyExceededMaxSize()) d = defer.Deferred() response.deliverBody(_ReadBodyWithMaxSizeProtocol(stream, d, max_size)) From a21e78f657a43334b3cd5f0a2f8d6ece7a6ac39c Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 16 Feb 2021 11:21:55 -0500 Subject: [PATCH 2/5] Use the Twisted provided length. --- synapse/http/client.py | 7 +++---- tests/http/test_client.py | 7 ++++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index b553f5ca3032..a7d177112920 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -56,7 +56,7 @@ ) from twisted.web.http import PotentialDataLoss from twisted.web.http_headers import Headers -from twisted.web.iweb import IAgent, IBodyProducer, IResponse +from twisted.web.iweb import UNKNOWN_LENGTH, IAgent, IBodyProducer, IResponse from synapse.api.errors import Codes, HttpResponseException, SynapseError from synapse.http import QuieterFileBodyProducer, RequestTimedOutError, redact_uri @@ -801,9 +801,8 @@ def read_body_with_max_size( """ # If the Content-Length header gives a size larger than the maximum allowed # size, do not bother downloading the body. - if max_size is not None: - content_length_headers = response.headers.getRawHeaders(b"Content-Length", None) - if content_length_headers and int(content_length_headers[0]) > max_size: + if max_size is not None and response.length != UNKNOWN_LENGTH: + if response.length > max_size: return defer.fail(BodyExceededMaxSize()) d = defer.Deferred() diff --git a/tests/http/test_client.py b/tests/http/test_client.py index f17c122e9361..5c8457732fce 100644 --- a/tests/http/test_client.py +++ b/tests/http/test_client.py @@ -18,6 +18,7 @@ from twisted.python.failure import Failure from twisted.web.client import ResponseDone +from twisted.web.iweb import UNKNOWN_LENGTH from synapse.http.client import BodyExceededMaxSize, read_body_with_max_size @@ -27,12 +28,12 @@ class ReadBodyWithMaxSizeTests(TestCase): def setUp(self): """Start reading the body, returns the response, result and proto""" - self.response = Mock() + response = Mock(length=UNKNOWN_LENGTH) self.result = BytesIO() - self.deferred = read_body_with_max_size(self.response, self.result, 6) + self.deferred = read_body_with_max_size(response, self.result, 6) # Fish the protocol out of the response. - self.protocol = self.response.deliverBody.call_args[0][0] + self.protocol = response.deliverBody.call_args[0][0] self.protocol.transport = Mock() def _cleanup_error(self): From 0be5ce4cd61e4086898d4e7c142571d64dad321c Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 16 Feb 2021 13:09:15 -0500 Subject: [PATCH 3/5] Use unbuffered requests from treq. --- synapse/http/client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/http/client.py b/synapse/http/client.py index a7d177112920..3acddb503922 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -407,6 +407,9 @@ async def request( agent=self.agent, data=body_producer, headers=headers, + # Avoid buffering the body in treq since we do not reuse + # response bodies. + unbuffered=True, **self._extra_treq_args, ) # type: defer.Deferred From fb54d3cad0965833ef55860adcbaf35b99ea66e8 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 16 Feb 2021 13:40:36 -0500 Subject: [PATCH 4/5] More forcefully close the connection. --- synapse/http/client.py | 4 +++- tests/http/test_client.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 3acddb503922..108a5d208306 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -768,7 +768,9 @@ def dataReceived(self, data: bytes) -> None: # in the meantime. if self.max_size is not None and self.length >= self.max_size: self.deferred.errback(BodyExceededMaxSize()) - self.transport.loseConnection() + # Close the connection (forcefully) since all the data will get + # discarded anyway. + self.transport.abortConnection() def connectionLost(self, reason: Failure) -> None: # If the maximum size was already exceeded, there's nothing to do. diff --git a/tests/http/test_client.py b/tests/http/test_client.py index 5c8457732fce..2d9b733be0ff 100644 --- a/tests/http/test_client.py +++ b/tests/http/test_client.py @@ -89,7 +89,7 @@ def test_additional_data(self): self.protocol.dataReceived(b"1234567890") self.assertIsInstance(self.deferred.result, Failure) self.assertIsInstance(self.deferred.result.value, BodyExceededMaxSize) - self.protocol.transport.loseConnection.assert_called_once() + self.protocol.transport.abortConnection.assert_called_once() # More data might have come in. self.protocol.dataReceived(b"1234567890") From ae84efe2cb687a48f3754bdd39ad4b4e038f0000 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 16 Feb 2021 14:20:31 -0500 Subject: [PATCH 5/5] Newsfragment --- changelog.d/9421.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/9421.bugfix diff --git a/changelog.d/9421.bugfix b/changelog.d/9421.bugfix new file mode 100644 index 000000000000..b73ed5664ca9 --- /dev/null +++ b/changelog.d/9421.bugfix @@ -0,0 +1 @@ +Reduce the amount of memory used when generating the URL preview of a file that is larger than the `max_spider_size`.