diff --git a/CHANGES b/CHANGES index 6ac2d3f8d69cbc..f08c4242d33877 100644 --- a/CHANGES +++ b/CHANGES @@ -6,6 +6,7 @@ Version 8.12 (Unreleased) more accurately and will report system errors them to the internal logger. - Added data migration to backfill legacy release data - Added data migration to backfill legacy commit data +- Allow gziped/deflated JavaScript artifacts to be uploaded through the API. SDKs ~~~~ diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py index 9f0abb24e6a7cb..f60692ace7e3c1 100644 --- a/src/sentry/lang/javascript/processor.py +++ b/src/sentry/lang/javascript/processor.py @@ -8,7 +8,6 @@ import base64 import six import time -import zlib from django.conf import settings from django.core.exceptions import SuspiciousOperation @@ -18,6 +17,7 @@ from requests.utils import get_encoding_from_headers from six.moves.urllib.parse import urlparse, urljoin, urlsplit from libsourcemap import from_json as view_from_json +from urllib3.response import GzipDecoder, DeflateDecoder # In case SSL is unavailable (light builds) we can't import this here. try: @@ -32,7 +32,6 @@ class ZeroReturnError(Exception): from sentry.interfaces.stacktrace import Stacktrace from sentry.models import EventError, Release, ReleaseFile from sentry.utils.cache import cache -from sentry.utils.files import compress_file from sentry.utils.hashlib import md5_text from sentry.utils.http import is_valid_origin from sentry.utils.strings import truncatechars @@ -137,6 +136,17 @@ def trim_line(line, column=0): return line +# TODO(mattrobenolt): Generalize on this and leverage the urllib3 +# decoders inside coreapi as well so we have a unified method for +# handling gzip/deflate decompression. urllib3 is pretty good at this. +def get_content_decoder_from_headers(headers): + content_encoding = headers.get('content-encoding', '').lower() + if content_encoding == 'gzip': + return GzipDecoder() + if content_encoding == 'deflate': + return DeflateDecoder() + + def get_source_context(source, lineno, colno, context=LINES_OF_CONTEXT): if not source: return [], '', [] @@ -218,7 +228,7 @@ def discover_sourcemap(result): def fetch_release_file(filename, release): - cache_key = 'releasefile:v1:%s:%s' % ( + cache_key = 'releasefile:v2:%s:%s' % ( release.id, md5_text(filename).hexdigest(), ) @@ -265,31 +275,36 @@ def fetch_release_file(filename, release): logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: + body = [] with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: - z_body, body = compress_file(fp) + for chunk in fp.chunks(): + body.append(chunk) + body = b''.join(body) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} - encoding = get_encoding_from_headers(headers) - result = (headers, body, 200, encoding) - cache.set(cache_key, (headers, z_body, 200, encoding), 3600) + # Handle gzip/deflate compression depending on Content-Encoding header + decoder = get_content_decoder_from_headers(headers) + if decoder: + try: + body = decoder.decompress(body) + except Exception: + raise CannotFetchSource({ + 'type': EventError.JS_INVALID_SOURCE_ENCODING, + 'value': headers.get('content-encoding'), + 'url': expose_url(filename), + }) + result = (headers, body, 200, get_encoding_from_headers(headers)) + cache.set(cache_key, result, 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None - else: - # Previous caches would be a 3-tuple instead of a 4-tuple, - # so this is being maintained for backwards compatibility - try: - encoding = result[3] - except IndexError: - encoding = None - result = (result[0], zlib.decompress(result[1]), result[2], encoding) return result @@ -313,7 +328,7 @@ def fetch_file(url, project=None, release=None, allow_scraping=True): else: result = None - cache_key = 'source:cache:v3:%s' % ( + cache_key = 'source:cache:v4:%s' % ( md5_text(url).hexdigest(), ) @@ -327,16 +342,6 @@ def fetch_file(url, project=None, release=None, allow_scraping=True): logger.debug('Checking cache for url %r', url) result = cache.get(cache_key) - if result is not None: - # Previous caches would be a 3-tuple instead of a 4-tuple, - # so this is being maintained for backwards compatibility - try: - encoding = result[3] - except IndexError: - encoding = None - # We got a cache hit, but the body is compressed, so we - # need to decompress it before handing it off - result = (result[0], zlib.decompress(result[1]), result[2], encoding) if result is None: # lock down domains that are problematic @@ -438,11 +443,10 @@ def fetch_file(url, project=None, release=None, allow_scraping=True): raise CannotFetchSource(error) body = b''.join(contents) - z_body = zlib.compress(body) headers = {k.lower(): v for k, v in response.headers.items()} encoding = response.encoding - cache.set(cache_key, (headers, z_body, response.status_code, encoding), 60) + cache.set(cache_key, (headers, body, response.status_code, encoding), 60) result = (headers, body, response.status_code, encoding) finally: if response is not None: diff --git a/src/sentry/utils/files.py b/src/sentry/utils/files.py deleted file mode 100644 index 1252e0002b124a..00000000000000 --- a/src/sentry/utils/files.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -sentry.utils.files -~~~~~~~~~~~~~~~~~~ - -:copyright: (c) 2010-2014 by the Sentry Team, see AUTHORS for more details. -:license: BSD, see LICENSE for more details. -""" -from __future__ import absolute_import - -import zlib - - -def compress_file(fp, level=6): - compressor = zlib.compressobj(level) - z_chunks = [] - chunks = [] - for chunk in fp.chunks(): - chunks.append(chunk) - z_chunks.append(compressor.compress(chunk)) - return (b''.join(z_chunks) + compressor.flush(), b''.join(chunks)) diff --git a/tests/sentry/lang/javascript/test_processor.py b/tests/sentry/lang/javascript/test_processor.py index c5862e2ab4e02d..78d29100b80c70 100644 --- a/tests/sentry/lang/javascript/test_processor.py +++ b/tests/sentry/lang/javascript/test_processor.py @@ -5,6 +5,7 @@ import pytest import responses import six +import zlib from libsourcemap import Token from mock import patch @@ -72,6 +73,126 @@ def test_unicode(self): assert result == new_result + def test_deflate(self): + project = self.project + release = Release.objects.create( + project=project, + organization_id=project.organization_id, + version='abc', + ) + release.add_project(project) + + file = File.objects.create( + name='file.min.js', + type='release.file', + headers={ + 'Content-Type': 'application/json; charset=utf-8', + 'Content-Encoding': 'deflate' + }, + ) + + binary_body = unicode_body.encode('utf-8') + file.putfile(six.BytesIO(zlib.compress(binary_body))) + + ReleaseFile.objects.create( + name='file.min.js', + release=release, + project=project, + file=file, + ) + + result = fetch_release_file('file.min.js', release) + + assert type(result[1]) is six.binary_type + assert result == ( + {'content-type': 'application/json; charset=utf-8', 'content-encoding': 'deflate'}, + binary_body, + 200, + 'utf-8', + ) + + # test with cache hit, which should be compressed + new_result = fetch_release_file('file.min.js', release) + + assert result == new_result + + def test_gzip(self): + project = self.project + release = Release.objects.create( + project=project, + organization_id=project.organization_id, + version='abc', + ) + release.add_project(project) + + file = File.objects.create( + name='file.min.js', + type='release.file', + headers={ + 'Content-Type': 'application/json; charset=utf-8', + 'Content-Encoding': 'gzip' + }, + ) + + binary_body = unicode_body.encode('utf-8') + compressor = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS) + file.putfile(six.BytesIO(b''.join([ + compressor.compress(binary_body), + compressor.flush(), + ]))) + + ReleaseFile.objects.create( + name='file.min.js', + release=release, + project=project, + file=file, + ) + + result = fetch_release_file('file.min.js', release) + + assert type(result[1]) is six.binary_type + assert result == ( + {'content-type': 'application/json; charset=utf-8', 'content-encoding': 'gzip'}, + binary_body, + 200, + 'utf-8', + ) + + # test with cache hit, which should be compressed + new_result = fetch_release_file('file.min.js', release) + + assert result == new_result + + def test_garbage_encoding(self): + project = self.project + release = Release.objects.create( + project=project, + organization_id=project.organization_id, + version='abc', + ) + release.add_project(project) + + file = File.objects.create( + name='file.min.js', + type='release.file', + headers={ + 'Content-Type': 'application/json; charset=utf-8', + 'Content-Encoding': 'gzip' + }, + ) + + file.putfile(six.BytesIO('notgzipped')) + + ReleaseFile.objects.create( + name='file.min.js', + release=release, + project=project, + file=file, + ) + + with pytest.raises(CannotFetchSource): + fetch_release_file('file.min.js', release) + class FetchFileTest(TestCase): @responses.activate