getsentry · mattrobenolt · Dec 15, 2016 · Dec 14, 2016 · Dec 15, 2016 · Dec 15, 2016
diff --git a/CHANGES b/CHANGES
@@ -6,6 +6,7 @@ Version 8.12 (Unreleased)
   more accurately and will report system errors them to the internal logger.
 - Added data migration to backfill legacy release data
 - Added data migration to backfill legacy commit data
+- Allow gziped/deflated JavaScript artifacts to be uploaded through the API.
 
 SDKs
 ~~~~

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
@@ -8,7 +8,6 @@
 import base64
 import six
 import time
-import zlib
 
 from django.conf import settings
 from django.core.exceptions import SuspiciousOperation
@@ -18,6 +17,7 @@
 from requests.utils import get_encoding_from_headers
 from six.moves.urllib.parse import urlparse, urljoin, urlsplit
 from libsourcemap import from_json as view_from_json
+from urllib3.response import GzipDecoder, DeflateDecoder
 
 # In case SSL is unavailable (light builds) we can't import this here.
 try:
@@ -32,7 +32,6 @@ class ZeroReturnError(Exception):
 from sentry.interfaces.stacktrace import Stacktrace
 from sentry.models import EventError, Release, ReleaseFile
 from sentry.utils.cache import cache
-from sentry.utils.files import compress_file
 from sentry.utils.hashlib import md5_text
 from sentry.utils.http import is_valid_origin
 from sentry.utils.strings import truncatechars
@@ -137,6 +136,17 @@ def trim_line(line, column=0):
     return line
 
 
+# TODO(mattrobenolt): Generalize on this and leverage the urllib3
+# decoders inside coreapi as well so we have a unified method for
+# handling gzip/deflate decompression. urllib3 is pretty good at this.
+def get_content_decoder_from_headers(headers):
+    content_encoding = headers.get('content-encoding', '').lower()
+    if content_encoding == 'gzip':
+        return GzipDecoder()
+    if content_encoding == 'deflate':
+        return DeflateDecoder()
+
+
 def get_source_context(source, lineno, colno, context=LINES_OF_CONTEXT):
     if not source:
         return [], '', []
@@ -218,7 +228,7 @@ def discover_sourcemap(result):
 
 
 def fetch_release_file(filename, release):
-    cache_key = 'releasefile:v1:%s:%s' % (
+    cache_key = 'releasefile:v2:%s:%s' % (
         release.id,
         md5_text(filename).hexdigest(),
     )
@@ -265,31 +275,36 @@ def fetch_release_file(filename, release):
         logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                      filename, releasefile.id, release.id)
         try:
+            body = []
             with metrics.timer('sourcemaps.release_file_read'):
                 with releasefile.file.getfile() as fp:
-                    z_body, body = compress_file(fp)
+                    for chunk in fp.chunks():
+                        body.append(chunk)
+            body = b''.join(body)
         except Exception as e:
             logger.exception(six.text_type(e))
             cache.set(cache_key, -1, 3600)
             result = None
         else:
             headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
-            encoding = get_encoding_from_headers(headers)
-            result = (headers, body, 200, encoding)
-            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)
+            # Handle gzip/deflate compression depending on Content-Encoding header
+            decoder = get_content_decoder_from_headers(headers)
+            if decoder:
+                try:
+                    body = decoder.decompress(body)
+                except Exception:
+                    raise CannotFetchSource({
+                        'type': EventError.JS_INVALID_SOURCE_ENCODING,
+                        'value': headers.get('content-encoding'),
+                        'url': expose_url(filename),
+                    })
+            result = (headers, body, 200, get_encoding_from_headers(headers))
+            cache.set(cache_key, result, 3600)
 
     elif result == -1:
         # We cached an error, so normalize
         # it down to None
         result = None
-    else:
-        # Previous caches would be a 3-tuple instead of a 4-tuple,
-        # so this is being maintained for backwards compatibility
-        try:
-            encoding = result[3]
-        except IndexError:
-            encoding = None
-        result = (result[0], zlib.decompress(result[1]), result[2], encoding)
 
     return result
 
@@ -313,7 +328,7 @@ def fetch_file(url, project=None, release=None, allow_scraping=True):
     else:
         result = None
 
-    cache_key = 'source:cache:v3:%s' % (
+    cache_key = 'source:cache:v4:%s' % (
         md5_text(url).hexdigest(),
     )
 
@@ -327,16 +342,6 @@ def fetch_file(url, project=None, release=None, allow_scraping=True):
 
         logger.debug('Checking cache for url %r', url)
         result = cache.get(cache_key)
-        if result is not None:
-            # Previous caches would be a 3-tuple instead of a 4-tuple,
-            # so this is being maintained for backwards compatibility
-            try:
-                encoding = result[3]
-            except IndexError:
-                encoding = None
-            # We got a cache hit, but the body is compressed, so we
-            # need to decompress it before handing it off
-            result = (result[0], zlib.decompress(result[1]), result[2], encoding)
 
     if result is None:
         # lock down domains that are problematic
@@ -438,11 +443,10 @@ def fetch_file(url, project=None, release=None, allow_scraping=True):
                     raise CannotFetchSource(error)
 
                 body = b''.join(contents)
-                z_body = zlib.compress(body)
                 headers = {k.lower(): v for k, v in response.headers.items()}
                 encoding = response.encoding
 
-                cache.set(cache_key, (headers, z_body, response.status_code, encoding), 60)
+                cache.set(cache_key, (headers, body, response.status_code, encoding), 60)
                 result = (headers, body, response.status_code, encoding)
             finally:
                 if response is not None:

diff --git a/src/sentry/utils/files.py b/src/sentry/utils/files.py
diff --git a/tests/sentry/lang/javascript/test_processor.py b/tests/sentry/lang/javascript/test_processor.py
@@ -5,6 +5,7 @@
 import pytest
 import responses
 import six
+import zlib
 from libsourcemap import Token
 
 from mock import patch
@@ -72,6 +73,126 @@ def test_unicode(self):
 
         assert result == new_result
 
+    def test_deflate(self):
+        project = self.project
+        release = Release.objects.create(
+            project=project,
+            organization_id=project.organization_id,
+            version='abc',
+        )
+        release.add_project(project)
+
+        file = File.objects.create(
+            name='file.min.js',
+            type='release.file',
+            headers={
+                'Content-Type': 'application/json; charset=utf-8',
+                'Content-Encoding': 'deflate'
+            },
+        )
+
+        binary_body = unicode_body.encode('utf-8')
+        file.putfile(six.BytesIO(zlib.compress(binary_body)))
+
+        ReleaseFile.objects.create(
+            name='file.min.js',
+            release=release,
+            project=project,
+            file=file,
+        )
+
+        result = fetch_release_file('file.min.js', release)
+
+        assert type(result[1]) is six.binary_type
+        assert result == (
+            {'content-type': 'application/json; charset=utf-8', 'content-encoding': 'deflate'},
+            binary_body,
+            200,
+            'utf-8',
+        )
+
+        # test with cache hit, which should be compressed
+        new_result = fetch_release_file('file.min.js', release)
+
+        assert result == new_result
+
+    def test_gzip(self):
+        project = self.project
+        release = Release.objects.create(
+            project=project,
+            organization_id=project.organization_id,
+            version='abc',
+        )
+        release.add_project(project)
+
+        file = File.objects.create(
+            name='file.min.js',
+            type='release.file',
+            headers={
+                'Content-Type': 'application/json; charset=utf-8',
+                'Content-Encoding': 'gzip'
+            },
+        )
+
+        binary_body = unicode_body.encode('utf-8')
+        compressor = zlib.compressobj(6, zlib.DEFLATED, 16 + zlib.MAX_WBITS)
+        file.putfile(six.BytesIO(b''.join([
+            compressor.compress(binary_body),
+            compressor.flush(),
+        ])))
+
+        ReleaseFile.objects.create(
+            name='file.min.js',
+            release=release,
+            project=project,
+            file=file,
+        )
+
+        result = fetch_release_file('file.min.js', release)
+
+        assert type(result[1]) is six.binary_type
+        assert result == (
+            {'content-type': 'application/json; charset=utf-8', 'content-encoding': 'gzip'},
+            binary_body,
+            200,
+            'utf-8',
+        )
+
+        # test with cache hit, which should be compressed
+        new_result = fetch_release_file('file.min.js', release)
+
+        assert result == new_result
+
+    def test_garbage_encoding(self):
+        project = self.project
+        release = Release.objects.create(
+            project=project,
+            organization_id=project.organization_id,
+            version='abc',
+        )
+        release.add_project(project)
+
+        file = File.objects.create(
+            name='file.min.js',
+            type='release.file',
+            headers={
+                'Content-Type': 'application/json; charset=utf-8',
+                'Content-Encoding': 'gzip'
+            },
+        )
+
+        file.putfile(six.BytesIO('notgzipped'))
+
+        ReleaseFile.objects.create(
+            name='file.min.js',
+            release=release,
+            project=project,
+            file=file,
+        )
+
+        with pytest.raises(CannotFetchSource):
+            fetch_release_file('file.min.js', release)
+
 
 class FetchFileTest(TestCase):
     @responses.activate